ruby-changes:54961
From: nobu <ko1@a...>
Date: Tue, 5 Mar 2019 09:32:22 +0900 (JST)
Subject: [ruby-changes:54961] nobu:r67167 (trunk): string.c: respect the actual encoding
nobu 2019-03-05 09:32:15 +0900 (Tue, 05 Mar 2019) New Revision: 67167 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=67167 Log: string.c: respect the actual encoding * string.c (rb_enc_str_coderange): respect the actual encoding of if a BOM presents, and scan for the actual code range. [ruby-core:91662] [Bug #15635] Modified files: trunk/string.c trunk/test/ruby/test_m17n.rb Index: test/ruby/test_m17n.rb =================================================================== --- test/ruby/test_m17n.rb (revision 67166) +++ test/ruby/test_m17n.rb (revision 67167) @@ -269,6 +269,13 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n.rb#L269 assert_empty(encs, bug10598) end + def test_utf_without_bom_valid + encs = [Encoding::UTF_16, Encoding::UTF_32].find_all {|enc| + !(+"abcd").encode!(enc).force_encoding(enc).valid_encoding? + } + assert_empty(encs) + end + def test_object_utf16_32_inspect EnvUtil.suppress_warning do begin Index: string.c =================================================================== --- string.c (revision 67166) +++ string.c (revision 67167) @@ -655,12 +655,13 @@ rb_enc_str_coderange(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L655 if (cr == ENC_CODERANGE_UNKNOWN) { int encidx = ENCODING_GET(str); rb_encoding *enc = rb_enc_from_index(encidx); - if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc)) { + if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc) && + rb_enc_mbminlen(enc = get_actual_encoding(encidx, str)) == 1) { cr = ENC_CODERANGE_BROKEN; } else { cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), - get_actual_encoding(encidx, str)); + enc); } ENC_CODERANGE_SET(str, cr); } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/