[前][次][番号順一覧][スレッド一覧]

ruby-changes:39792

From: nobu <ko1@a...>
Date: Wed, 16 Sep 2015 14:50:21 +0900 (JST)
Subject: [ruby-changes:39792] nobu:r51873 (trunk): string.c: keep coderange

nobu	2015-09-16 14:50:00 +0900 (Wed, 16 Sep 2015)

  New Revision: 51873

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=51873

  Log:
    string.c: keep coderange
    
    * string.c (rb_str_setbyte): keep the code range as possible.

  Modified files:
    trunk/ChangeLog
    trunk/string.c
    trunk/test/ruby/test_m17n.rb
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 51872)
+++ ChangeLog	(revision 51873)
@@ -1,3 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Wed Sep 16 14:49:58 2015  Nobuyoshi Nakada  <nobu@r...>
+
+	* string.c (rb_str_setbyte): keep the code range as possible.
+
 Wed Sep 16 13:23:48 2015  NAKAMURA Usaku  <usa@r...>
 
 	* doc/syntax/literals.rdoc (Strings): mention about ?a literal.
Index: string.c
===================================================================
--- string.c	(revision 51872)
+++ string.c	(revision 51873)
@@ -4702,15 +4702,56 @@ rb_str_setbyte(VALUE str, VALUE index, V https://github.com/ruby/ruby/blob/trunk/string.c#L4702
     long pos = NUM2LONG(index);
     int byte = NUM2INT(value);
     long len = RSTRING_LEN(str);
+    char *head, *ptr, *left = 0;
+    rb_encoding *enc;
+    int cr = ENC_CODERANGE_UNKNOWN, width, nlen;
 
     if (pos < -len || len <= pos)
         rb_raise(rb_eIndexError, "index %ld out of string", pos);
     if (pos < 0)
         pos += len;
 
-    rb_str_modify(str);
+    if (!str_independent(str))
+	str_make_independent(str);
+    enc = STR_ENC_GET(str);
+    head = RSTRING_PTR(str);
+    ptr = &head[pos];
+    if (len > RSTRING_EMBED_LEN_MAX) {
+	cr = ENC_CODERANGE(str);
+	switch (cr) {
+	  case ENC_CODERANGE_7BIT:
+	    left = ptr;
+	    width = 1;
+	    break;
+	  case ENC_CODERANGE_VALID:
+	    left = rb_enc_left_char_head(head, ptr, head+len, enc);
+	    width = rb_enc_precise_mbclen(left, head+len, enc);
+	    break;
+	  default:
+	    ENC_CODERANGE_CLEAR(str);
+	}
+    }
+    else {
+	ENC_CODERANGE_CLEAR(str);
+    }
 
-    RSTRING_PTR(str)[pos] = byte;
+    *ptr = byte;
+
+    switch (cr) {
+      case ENC_CODERANGE_7BIT:
+	if (ISASCII(byte)) break;
+      case ENC_CODERANGE_VALID:
+	nlen = rb_enc_precise_mbclen(left, head+len, enc);
+	if (!MBCLEN_CHARFOUND_P(nlen))
+	    ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
+	else if (cr == ENC_CODERANGE_7BIT)
+	    ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
+	else if (MBCLEN_CHARFOUND_LEN(nlen) != width)
+	    ENC_CODERANGE_CLEAR(str);
+	else if (ISASCII(byte)) /* may become 7BIT */
+	    ENC_CODERANGE_CLEAR(str);
+	break;
+    }
 
     return value;
 }
Index: test/ruby/test_m17n.rb
===================================================================
--- test/ruby/test_m17n.rb	(revision 51872)
+++ test/ruby/test_m17n.rb	(revision 51873)
@@ -1488,6 +1488,31 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n.rb#L1488
     s = u("\xE3\x81\x82\xE3\x81\x84")
     s.setbyte(-4, 0x84)
     assert_equal(u("\xE3\x81\x84\xE3\x81\x84"), s)
+
+    x = "x" * 100
+    t = nil
+    failure = proc {"#{i}: #{encdump(t)}"}
+
+    s = "\u{3042 3044}"
+    s.bytesize.times {|i|
+      t = s + x
+      t.setbyte(i, t.getbyte(i)+1)
+      assert_predicate(t, :valid_encoding?, failure)
+      assert_not_predicate(t, :ascii_only?, failure)
+      t = s + x
+      t.setbyte(i, 0x20)
+      assert_not_predicate(t, :valid_encoding?, failure)
+    }
+
+    s = "\u{41 42 43}"
+    s.bytesize.times {|i|
+      t = s + x
+      t.setbyte(i, 0x20)
+      assert_predicate(t, :valid_encoding?, failure)
+      assert_predicate(t, :ascii_only?, failure)
+      t.setbyte(i, 0xe3)
+      assert_not_predicate(t, :valid_encoding?, failure)
+    }
   end
 
   def test_compatible

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]