ruby-changes:47706
From: nagachika <ko1@a...>
Date: Sun, 10 Sep 2017 21:17:56 +0900 (JST)
Subject: [ruby-changes:47706] nagachika:r59822 (ruby_2_4): merge revision(s) 59763: [Backport #13874]
nagachika 2017-09-10 21:17:49 +0900 (Sun, 10 Sep 2017) New Revision: 59822 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59822 Log: merge revision(s) 59763: [Backport #13874] string.c: fix false coderange * string.c (rb_enc_str_scrub): enc can differ from the actual encoding of the string, the cached coderange is useless then. [ruby-core:82674] [Bug #13874] Modified directories: branches/ruby_2_4/ Modified files: branches/ruby_2_4/string.c branches/ruby_2_4/test/ruby/test_transcode.rb branches/ruby_2_4/version.h Index: ruby_2_4/test/ruby/test_transcode.rb =================================================================== --- ruby_2_4/test/ruby/test_transcode.rb (revision 59821) +++ ruby_2_4/test/ruby/test_transcode.rb (revision 59822) @@ -2094,17 +2094,19 @@ class TestTranscode < Test::Unit::TestCa https://github.com/ruby/ruby/blob/trunk/ruby_2_4/test/ruby/test_transcode.rb#L2094 def test_valid_dummy_encoding bug9314 = '[ruby-core:59354] [Bug #9314]' - assert_separately(%W[- -- #{bug9314}], <<-'end;') - bug = ARGV.shift - result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_16)} - assert_equal("\xFE\xFF\x00t\x00e\x00s\x00t", result.b, bug) - result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_32)} - assert_equal("\x00\x00\xFE\xFF\x00\x00\x00t\x00\x00\x00e\x00\x00\x00s\x00\x00\x00t", result.b, bug) + assert_separately(%W[- -- #{bug9314}], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + bug = ARGV.shift + result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_16)} + assert_equal("\xFE\xFF\x00t\x00e\x00s\x00t", result.b, bug) + result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_32)} + assert_equal("\x00\x00\xFE\xFF\x00\x00\x00t\x00\x00\x00e\x00\x00\x00s\x00\x00\x00t", result.b, bug) end; end def test_loading_race - assert_separately([], <<-'end;') #do + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; bug11277 = '[ruby-dev:49106] [Bug #11277]' num = 2 th = (0...num).map do |i| @@ -2121,6 +2123,17 @@ class TestTranscode < Test::Unit::TestCa https://github.com/ruby/ruby/blob/trunk/ruby_2_4/test/ruby/test_transcode.rb#L2123 end; end + def test_scrub_encode_with_coderange + bug = '[ruby-core:82674] [Bug #13874]' + s = "\xe5".b + u = Encoding::UTF_8 + assert_equal("?", s.encode(u, u, invalid: :replace, replace: "?"), + "should replace invalid byte") + assert_predicate(s, :valid_encoding?, "any char is valid in binary") + assert_equal("?", s.encode(u, u, invalid: :replace, replace: "?"), + "#{bug} coderange should not have side effects") + end + def test_universal_newline bug11324 = '[ruby-core:69841] [Bug #11324]' usascii = Encoding::US_ASCII Index: ruby_2_4/version.h =================================================================== --- ruby_2_4/version.h (revision 59821) +++ ruby_2_4/version.h (revision 59822) @@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_4/version.h#L1 #define RUBY_VERSION "2.4.2" #define RUBY_RELEASE_DATE "2017-09-10" -#define RUBY_PATCHLEVEL 194 +#define RUBY_PATCHLEVEL 195 #define RUBY_RELEASE_YEAR 2017 #define RUBY_RELEASE_MONTH 9 Index: ruby_2_4/string.c =================================================================== --- ruby_2_4/string.c (revision 59821) +++ ruby_2_4/string.c (revision 59822) @@ -9221,6 +9221,8 @@ str_compat_and_valid(VALUE str, rb_encod https://github.com/ruby/ruby/blob/trunk/ruby_2_4/string.c#L9221 return str; } +static VALUE enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr); + /** * @param str the string to be scrubbed * @param repl the replacement character @@ -9229,13 +9231,25 @@ str_compat_and_valid(VALUE str, rb_encod https://github.com/ruby/ruby/blob/trunk/ruby_2_4/string.c#L9231 VALUE rb_str_scrub(VALUE str, VALUE repl) { - return rb_enc_str_scrub(STR_ENC_GET(str), str, repl); + rb_encoding *enc = STR_ENC_GET(str); + return enc_str_scrub(enc, str, repl, ENC_CODERANGE(str)); } VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl) { - int cr = ENC_CODERANGE(str); + int cr = ENC_CODERANGE_UNKNOWN; + if (enc == STR_ENC_GET(str)) { + /* cached coderange makes sense only when enc equals the + * actual encoding of str */ + cr = ENC_CODERANGE(str); + } + return enc_str_scrub(enc, str, repl, cr); +} + +static VALUE +enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) +{ int encidx; VALUE buf = Qnil; const char *rep; Index: ruby_2_4 =================================================================== --- ruby_2_4 (revision 59821) +++ ruby_2_4 (revision 59822) Property changes on: ruby_2_4 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /trunk:r59763 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/