[前][次][番号順一覧][スレッド一覧]

ruby-changes:72990

From: Jeremy <ko1@a...>
Date: Sat, 20 Aug 2022 12:23:55 +0900 (JST)
Subject: [ruby-changes:72990] 6f3857f6a7 (master): Support Encoding::Converter newline: :lf and :lf_newline options

https://git.ruby-lang.org/ruby.git/commit/?id=6f3857f6a7

From 6f3857f6a7b3cd6bd7e62e4efdbb1b841544e053 Mon Sep 17 00:00:00 2001
From: Jeremy Evans <code@j...>
Date: Fri, 18 Jun 2021 16:05:15 -0700
Subject: Support Encoding::Converter newline: :lf and :lf_newline options

Previously, newline: :lf was accepted but ignored.  Where it
should have been used was commented out code that didn't work,
but unlike all other invalid values, using newline: :lf did
not raise an error.

This adds support for newline: :lf and :lf_newline, for consistency
with newline: :cr and :cr_newline.  This is basically the same as
universal_newline, except that it only affects writing and not
reading due to RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK.

Add tests for the File.open :newline option while here.

Fixes [Bug #12436]
---
 enc/trans/newline.trans                    | 20 ++++++++++++++
 include/ruby/internal/encoding/transcode.h | 18 ++++++++-----
 test/ruby/test_file.rb                     | 42 ++++++++++++++++++++++++++++++
 test/ruby/test_transcode.rb                |  2 ++
 transcode.c                                | 30 ++++++++++++++++++++-
 5 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans
index 9e763407f9..95e082f5bd 100644
--- a/enc/trans/newline.trans
+++ b/enc/trans/newline.trans
@@ -17,10 +17,16 @@ https://github.com/ruby/ruby/blob/trunk/enc/trans/newline.trans#L17
   map_cr["0a"] = "0d"
 
   transcode_generate_node(ActionMap.parse(map_cr), "cr_newline")
+
+  map_normalize = {}
+  map_normalize["{00-ff}"] = :func_so
+
+  transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline")
 %>
 
 <%= transcode_generated_code %>
 
+#define lf_newline universal_newline
 #define STATE (sp[0])
 #define NORMAL 0
 #define JUST_AFTER_CR 1
@@ -126,10 +132,24 @@ rb_cr_newline = { https://github.com/ruby/ruby/blob/trunk/enc/trans/newline.trans#L132
     0, 0, 0, 0
 };
 
+static const rb_transcoder
+rb_lf_newline = {
+    "", "lf_newline", lf_newline,
+    TRANSCODE_TABLE_INFO,
+    1, /* input_unit_length */
+    1, /* max_input */
+    2, /* max_output */
+    asciicompat_converter, /* asciicompat_type */
+    2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
+    0, 0, 0, fun_so_universal_newline,
+    universal_newline_finish
+};
+
 void
 Init_newline(void)
 {
     rb_register_transcoder(&rb_universal_newline);
     rb_register_transcoder(&rb_crlf_newline);
     rb_register_transcoder(&rb_cr_newline);
+    rb_register_transcoder(&rb_lf_newline);
 }
diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h
index 60c96a41c9..7f26d2eae9 100644
--- a/include/ruby/internal/encoding/transcode.h
+++ b/include/ruby/internal/encoding/transcode.h
@@ -476,16 +476,16 @@ enum ruby_econv_flag_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/encoding/transcode.h#L476
     RUBY_ECONV_UNDEF_HEX_CHARREF                = 0x00000030,
 
     /** Decorators are there. */
-    RUBY_ECONV_DECORATOR_MASK                   = 0x0000ff00,
+    RUBY_ECONV_DECORATOR_MASK                   = 0x0001ff00,
 
     /** Newline converters are there. */
-    RUBY_ECONV_NEWLINE_DECORATOR_MASK           = 0x00003f00,
+    RUBY_ECONV_NEWLINE_DECORATOR_MASK           = 0x00007f00,
 
     /** (Unclear; seems unused). */
     RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK      = 0x00000f00,
 
     /** (Unclear; seems unused). */
-    RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK     = 0x00003000,
+    RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK     = 0x00007000,
 
     /** Universal newline mode. */
     RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR      = 0x00000100,
@@ -496,11 +496,14 @@ enum ruby_econv_flag_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/encoding/transcode.h#L496
     /** CRLF to CR conversion shall happen. */
     RUBY_ECONV_CR_NEWLINE_DECORATOR             = 0x00002000,
 
+    /** CRLF to LF conversion shall happen. */
+    RUBY_ECONV_LF_NEWLINE_DECORATOR             = 0x00004000,
+
     /** Texts shall be XML-escaped. */
-    RUBY_ECONV_XML_TEXT_DECORATOR               = 0x00004000,
+    RUBY_ECONV_XML_TEXT_DECORATOR               = 0x00008000,
 
     /** Texts shall be AttrValue escaped */
-    RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR       = 0x00008000,
+    RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR       = 0x00010000,
 
     /** (Unclear; seems unused). */
     RUBY_ECONV_STATEFUL_DECORATOR_MASK          = 0x00f00000,
@@ -529,6 +532,7 @@ enum ruby_econv_flag_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/encoding/transcode.h#L532
 #define ECONV_UNIVERSAL_NEWLINE_DECORATOR       RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR  /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */
 #define ECONV_CRLF_NEWLINE_DECORATOR            RUBY_ECONV_CRLF_NEWLINE_DECORATOR       /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */
 #define ECONV_CR_NEWLINE_DECORATOR              RUBY_ECONV_CR_NEWLINE_DECORATOR         /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */
+#define ECONV_LF_NEWLINE_DECORATOR              RUBY_ECONV_LF_NEWLINE_DECORATOR         /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */
 #define ECONV_XML_TEXT_DECORATOR                RUBY_ECONV_XML_TEXT_DECORATOR           /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */
 #define ECONV_XML_ATTR_CONTENT_DECORATOR        RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR   /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */
 #define ECONV_STATEFUL_DECORATOR_MASK           RUBY_ECONV_STATEFUL_DECORATOR_MASK      /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */
@@ -543,10 +547,10 @@ enum ruby_econv_flag_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/encoding/transcode.h#L547
      */
 
     /** Indicates the input is a part of much larger one. */
-    RUBY_ECONV_PARTIAL_INPUT                    = 0x00010000,
+    RUBY_ECONV_PARTIAL_INPUT                    = 0x00020000,
 
     /** Instructs the converter to stop after output. */
-    RUBY_ECONV_AFTER_OUTPUT                     = 0x00020000,
+    RUBY_ECONV_AFTER_OUTPUT                     = 0x00040000,
 #define ECONV_PARTIAL_INPUT                     RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */
 #define ECONV_AFTER_OUTPUT                      RUBY_ECONV_AFTER_OUTPUT  /**< @old{RUBY_ECONV_AFTER_OUTPUT} */
 
diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb
index 905416911a..669b004b83 100644
--- a/test/ruby/test_file.rb
+++ b/test/ruby/test_file.rb
@@ -460,6 +460,48 @@ class TestFile < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_file.rb#L460
     end
   end
 
+  def test_file_open_newline_option
+    Dir.mktmpdir(__method__.to_s) do |tmpdir|
+      path = File.join(tmpdir, "foo")
+      test = lambda do |newline|
+        File.open(path, "wt", newline: newline) do |f|
+          f.write "a\n"
+          f.puts "b"
+        end
+        File.binread(path)
+      end
+      assert_equal("a\nb\n", test.(:lf))
+      assert_equal("a\nb\n", test.(:universal))
+      assert_equal("a\r\nb\r\n", test.(:crlf))
+      assert_equal("a\rb\r", test.(:cr))
+
+      test = lambda do |newline|
+        File.open(path, "rt", newline: newline) do |f|
+          f.read
+        end
+      end
+
+      File.binwrite(path, "a\nb\n")
+      assert_equal("a\nb\n", test.(:lf))
+      assert_equal("a\nb\n", test.(:universal))
+      assert_equal("a\nb\n", test.(:crlf))
+      assert_equal("a\nb\n", test.(:cr))
+
+      File.binwrite(path, "a\r\nb\r\n")
+      assert_equal("a\r\nb\r\n", test.(:lf))
+      assert_equal("a\nb\n", test.(:universal))
+      # Work on both Windows and non-Windows
+      assert_include(["a\r\nb\r\n", "a\nb\n"], test.(:crlf))
+      assert_equal("a\r\nb\r\n", test.(:cr))
+
+      File.binwrite(path, "a\rb\r")
+      assert_equal("a\rb\r", test.(:lf))
+      assert_equal("a\nb\n", test.(:universal))
+      assert_equal("a\rb\r", test.(:crlf))
+      assert_equal("a\rb\r", test.(:cr))
+    end
+  end
+
   def test_open_nul
     Dir.mktmpdir(__method__.to_s) do |tmpdir|
       path = File.join(tmpdir, "foo")
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index c8b0034e06..73737be0ad 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -2305,5 +2305,7 @@ class TestTranscode < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_transcode.rb#L2305
     assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr))
     assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true))
     assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf))
+    assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true))
+    assert_equal("A\nB\nC", s.encode(usascii, newline: :lf))
   end
 end
diff --git a/transcode.c b/transcode.c
index 5fafad398f..535e436b03 100644
--- a/transcode.c
+++ b/transcode.c
@@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr; https://github.com/ruby/ruby/blob/trunk/transcode.c#L47
 static VALUE sym_universal_newline;
 static VALUE sym_crlf_newline;
 static VALUE sym_cr_newline;
+static VALUE sym_lf_newline;
 #ifdef ENABLE_ECONV_NEWLINE_OPTION
 static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
 #endif
@@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret) https://github.com/ruby/ruby/blob/trunk/transcode.c#L1040
       case ECONV_UNIVERSAL_NEWLINE_DECORATOR:
       case ECONV_CRLF_NEWLINE_DECORATOR:
       case ECONV_CR_NEWLINE_DECORATOR:
+      case ECONV_LF_NEWLINE_DECORATOR:
       case 0:
         break;
       default:
@@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret) https://github.com/ruby/ruby/blob/trunk/transcode.c#L1064
         decorators_ret[num_decorators++] = "crlf_newline";
     if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
         decorators_ret[num_decorators++] = "cr_newline";
+    if (ecflags & ECONV_LF_NEWLINE_DECORATOR)
+        decorators_ret[num_decorators++] = "lf_newline";
     if ( (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]