ruby-changes:72990
From: Jeremy <ko1@a...>
Date: Sat, 20 Aug 2022 12:23:55 +0900 (JST)
Subject: [ruby-changes:72990] 6f3857f6a7 (master): Support Encoding::Converter newline: :lf and :lf_newline options
https://git.ruby-lang.org/ruby.git/commit/?id=6f3857f6a7 From 6f3857f6a7b3cd6bd7e62e4efdbb1b841544e053 Mon Sep 17 00:00:00 2001 From: Jeremy Evans <code@j...> Date: Fri, 18 Jun 2021 16:05:15 -0700 Subject: Support Encoding::Converter newline: :lf and :lf_newline options Previously, newline: :lf was accepted but ignored. Where it should have been used was commented out code that didn't work, but unlike all other invalid values, using newline: :lf did not raise an error. This adds support for newline: :lf and :lf_newline, for consistency with newline: :cr and :cr_newline. This is basically the same as universal_newline, except that it only affects writing and not reading due to RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK. Add tests for the File.open :newline option while here. Fixes [Bug #12436] --- enc/trans/newline.trans | 20 ++++++++++++++ include/ruby/internal/encoding/transcode.h | 18 ++++++++----- test/ruby/test_file.rb | 42 ++++++++++++++++++++++++++++++ test/ruby/test_transcode.rb | 2 ++ transcode.c | 30 ++++++++++++++++++++- 5 files changed, 104 insertions(+), 8 deletions(-) diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index 9e763407f9..95e082f5bd 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -17,10 +17,16 @@ https://github.com/ruby/ruby/blob/trunk/enc/trans/newline.trans#L17 map_cr["0a"] = "0d" transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") + + map_normalize = {} + map_normalize["{00-ff}"] = :func_so + + transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline") %> <%= transcode_generated_code %> +#define lf_newline universal_newline #define STATE (sp[0]) #define NORMAL 0 #define JUST_AFTER_CR 1 @@ -126,10 +132,24 @@ rb_cr_newline = { https://github.com/ruby/ruby/blob/trunk/enc/trans/newline.trans#L132 0, 0, 0, 0 }; +static const rb_transcoder +rb_lf_newline = { + "", "lf_newline", lf_newline, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 2, /* max_output */ + asciicompat_converter, /* asciicompat_type */ + 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ + 0, 0, 0, fun_so_universal_newline, + universal_newline_finish +}; + void Init_newline(void) { rb_register_transcoder(&rb_universal_newline); rb_register_transcoder(&rb_crlf_newline); rb_register_transcoder(&rb_cr_newline); + rb_register_transcoder(&rb_lf_newline); } diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h index 60c96a41c9..7f26d2eae9 100644 --- a/include/ruby/internal/encoding/transcode.h +++ b/include/ruby/internal/encoding/transcode.h @@ -476,16 +476,16 @@ enum ruby_econv_flag_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/encoding/transcode.h#L476 RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030, /** Decorators are there. */ - RUBY_ECONV_DECORATOR_MASK = 0x0000ff00, + RUBY_ECONV_DECORATOR_MASK = 0x0001ff00, /** Newline converters are there. */ - RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00, + RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00, /** (Unclear; seems unused). */ RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00, /** (Unclear; seems unused). */ - RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000, + RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000, /** Universal newline mode. */ RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100, @@ -496,11 +496,14 @@ enum ruby_econv_flag_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/encoding/transcode.h#L496 /** CRLF to CR conversion shall happen. */ RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000, + /** CRLF to LF conversion shall happen. */ + RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000, + /** Texts shall be XML-escaped. */ - RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000, + RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000, /** Texts shall be AttrValue escaped */ - RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000, + RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000, /** (Unclear; seems unused). */ RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000, @@ -529,6 +532,7 @@ enum ruby_econv_flag_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/encoding/transcode.h#L532 #define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */ #define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */ #define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */ +#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */ #define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */ #define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */ #define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */ @@ -543,10 +547,10 @@ enum ruby_econv_flag_type { https://github.com/ruby/ruby/blob/trunk/include/ruby/internal/encoding/transcode.h#L547 */ /** Indicates the input is a part of much larger one. */ - RUBY_ECONV_PARTIAL_INPUT = 0x00010000, + RUBY_ECONV_PARTIAL_INPUT = 0x00020000, /** Instructs the converter to stop after output. */ - RUBY_ECONV_AFTER_OUTPUT = 0x00020000, + RUBY_ECONV_AFTER_OUTPUT = 0x00040000, #define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */ #define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */ diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 905416911a..669b004b83 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -460,6 +460,48 @@ class TestFile < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_file.rb#L460 end end + def test_file_open_newline_option + Dir.mktmpdir(__method__.to_s) do |tmpdir| + path = File.join(tmpdir, "foo") + test = lambda do |newline| + File.open(path, "wt", newline: newline) do |f| + f.write "a\n" + f.puts "b" + end + File.binread(path) + end + assert_equal("a\nb\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\r\nb\r\n", test.(:crlf)) + assert_equal("a\rb\r", test.(:cr)) + + test = lambda do |newline| + File.open(path, "rt", newline: newline) do |f| + f.read + end + end + + File.binwrite(path, "a\nb\n") + assert_equal("a\nb\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\nb\n", test.(:crlf)) + assert_equal("a\nb\n", test.(:cr)) + + File.binwrite(path, "a\r\nb\r\n") + assert_equal("a\r\nb\r\n", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + # Work on both Windows and non-Windows + assert_include(["a\r\nb\r\n", "a\nb\n"], test.(:crlf)) + assert_equal("a\r\nb\r\n", test.(:cr)) + + File.binwrite(path, "a\rb\r") + assert_equal("a\rb\r", test.(:lf)) + assert_equal("a\nb\n", test.(:universal)) + assert_equal("a\rb\r", test.(:crlf)) + assert_equal("a\rb\r", test.(:cr)) + end + end + def test_open_nul Dir.mktmpdir(__method__.to_s) do |tmpdir| path = File.join(tmpdir, "foo") diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index c8b0034e06..73737be0ad 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -2305,5 +2305,7 @@ class TestTranscode < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_transcode.rb#L2305 assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf)) + assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true)) + assert_equal("A\nB\nC", s.encode(usascii, newline: :lf)) end end diff --git a/transcode.c b/transcode.c index 5fafad398f..535e436b03 100644 --- a/transcode.c +++ b/transcode.c @@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr; https://github.com/ruby/ruby/blob/trunk/transcode.c#L47 static VALUE sym_universal_newline; static VALUE sym_crlf_newline; static VALUE sym_cr_newline; +static VALUE sym_lf_newline; #ifdef ENABLE_ECONV_NEWLINE_OPTION static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf; #endif @@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret) https://github.com/ruby/ruby/blob/trunk/transcode.c#L1040 case ECONV_UNIVERSAL_NEWLINE_DECORATOR: case ECONV_CRLF_NEWLINE_DECORATOR: case ECONV_CR_NEWLINE_DECORATOR: + case ECONV_LF_NEWLINE_DECORATOR: case 0: break; default: @@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret) https://github.com/ruby/ruby/blob/trunk/transcode.c#L1064 decorators_ret[num_decorators++] = "crlf_newline"; if (ecflags & ECONV_CR_NEWLINE_DECORATOR) decorators_ret[num_decorators++] = "cr_newline"; + if (ecflags & ECONV_LF_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "lf_newline"; if ( (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/