[前][次][番号順一覧][スレッド一覧]

ruby-changes:47647

From: nobu <ko1@a...>
Date: Wed, 6 Sep 2017 22:11:49 +0900 (JST)
Subject: [ruby-changes:47647] nobu:r59763 (trunk): string.c: fix false coderange

nobu	2017-09-06 22:11:44 +0900 (Wed, 06 Sep 2017)

  New Revision: 59763

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59763

  Log:
    string.c: fix false coderange
    
    * string.c (rb_enc_str_scrub): enc can differ from the actual
      encoding of the string, the cached coderange is useless then.
      [ruby-core:82674] [Bug #13874]

  Modified files:
    trunk/string.c
    trunk/test/ruby/test_transcode.rb
Index: string.c
===================================================================
--- string.c	(revision 59762)
+++ string.c	(revision 59763)
@@ -9553,6 +9553,8 @@ str_compat_and_valid(VALUE str, rb_encod https://github.com/ruby/ruby/blob/trunk/string.c#L9553
     return str;
 }
 
+static VALUE enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr);
+
 /**
  * @param str the string to be scrubbed
  * @param repl the replacement character
@@ -9561,13 +9563,25 @@ str_compat_and_valid(VALUE str, rb_encod https://github.com/ruby/ruby/blob/trunk/string.c#L9563
 VALUE
 rb_str_scrub(VALUE str, VALUE repl)
 {
-    return rb_enc_str_scrub(STR_ENC_GET(str), str, repl);
+    rb_encoding *enc = STR_ENC_GET(str);
+    return enc_str_scrub(enc, str, repl, ENC_CODERANGE(str));
 }
 
 VALUE
 rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
 {
-    int cr = ENC_CODERANGE(str);
+    int cr = ENC_CODERANGE_UNKNOWN;
+    if (enc == STR_ENC_GET(str)) {
+	/* cached coderange makes sense only when enc equals the
+	 * actual encoding of str */
+	cr = ENC_CODERANGE(str);
+    }
+    return enc_str_scrub(enc, str, repl, cr);
+}
+
+static VALUE
+enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)
+{
     int encidx;
     VALUE buf = Qnil;
     const char *rep;
Index: test/ruby/test_transcode.rb
===================================================================
--- test/ruby/test_transcode.rb	(revision 59762)
+++ test/ruby/test_transcode.rb	(revision 59763)
@@ -2180,17 +2180,19 @@ class TestTranscode < Test::Unit::TestCa https://github.com/ruby/ruby/blob/trunk/test/ruby/test_transcode.rb#L2180
 
   def test_valid_dummy_encoding
     bug9314 = '[ruby-core:59354] [Bug #9314]'
-    assert_separately(%W[- -- #{bug9314}], <<-'end;')
-    bug = ARGV.shift
-    result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_16)}
-    assert_equal("\xFE\xFF\x00t\x00e\x00s\x00t", result.b, bug)
-    result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_32)}
-    assert_equal("\x00\x00\xFE\xFF\x00\x00\x00t\x00\x00\x00e\x00\x00\x00s\x00\x00\x00t", result.b, bug)
+    assert_separately(%W[- -- #{bug9314}], "#{<<~"begin;"}\n#{<<~'end;'}")
+    begin;
+      bug = ARGV.shift
+      result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_16)}
+      assert_equal("\xFE\xFF\x00t\x00e\x00s\x00t", result.b, bug)
+      result = assert_nothing_raised(TypeError, bug) {break "test".encode(Encoding::UTF_32)}
+      assert_equal("\x00\x00\xFE\xFF\x00\x00\x00t\x00\x00\x00e\x00\x00\x00s\x00\x00\x00t", result.b, bug)
     end;
   end
 
   def test_loading_race
-    assert_separately([], <<-'end;') #do
+    assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
+    begin;
       bug11277 = '[ruby-dev:49106] [Bug #11277]'
       num = 2
       th = (0...num).map do |i|
@@ -2207,6 +2209,17 @@ class TestTranscode < Test::Unit::TestCa https://github.com/ruby/ruby/blob/trunk/test/ruby/test_transcode.rb#L2209
     end;
   end
 
+  def test_scrub_encode_with_coderange
+    bug = '[ruby-core:82674] [Bug #13874]'
+    s = "\xe5".b
+    u = Encoding::UTF_8
+    assert_equal("?", s.encode(u, u, invalid: :replace, replace: "?"),
+                 "should replace invalid byte")
+    assert_predicate(s, :valid_encoding?, "any char is valid in binary")
+    assert_equal("?", s.encode(u, u, invalid: :replace, replace: "?"),
+                 "#{bug} coderange should not have side effects")
+  end
+
   def test_universal_newline
     bug11324 = '[ruby-core:69841] [Bug #11324]'
     usascii = Encoding::US_ASCII

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]