[前][次][番号順一覧][スレッド一覧]

ruby-changes:11131

From: yugui <ko1@a...>
Date: Tue, 3 Mar 2009 20:39:32 +0900 (JST)
Subject: [ruby-changes:11131] Ruby:r22731 (ruby_1_9_1): merges r22505 and r22547 from trunk into ruby_1_9_1.

yugui	2009-03-03 20:39:19 +0900 (Tue, 03 Mar 2009)

  New Revision: 22731

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=22731

  Log:
    merges r22505 and r22547 from trunk into ruby_1_9_1.
    --
    * string.c (tr_trans): should not be affected by the encoding of
      replacement unless actually modified.  [ruby-talk:328967]
    --
    * string.c (tr_trans): should recalculate coderange.
      [ruby-core:22326] (reopened at [ruby-core:22328])

  Modified files:
    branches/ruby_1_9_1/ChangeLog
    branches/ruby_1_9_1/string.c
    branches/ruby_1_9_1/test/ruby/test_string.rb

Index: ruby_1_9_1/ChangeLog
===================================================================
--- ruby_1_9_1/ChangeLog	(revision 22730)
+++ ruby_1_9_1/ChangeLog	(revision 22731)
@@ -1,3 +1,13 @@
+Sun Feb 22 22:42:20 2009  Nobuyoshi Nakada  <nobu@r...>
+
+	* string.c (tr_trans): should recalculate coderange.
+	  [ruby-core:22326] (reopened at [ruby-core:22328])
+
+Sun Feb 22 14:33:06 2009  Nobuyoshi Nakada  <nobu@r...>
+
+	* string.c (tr_trans): should not be affected by the encoding of
+	  replacement unless actually modified.  [ruby-talk:328967]
+
 Fri Feb  6 12:11:24 2009  NAKAMURA Usaku  <usa@r...>
 
 	* ruby.c (process_options): set initial default_external before -r.
Index: ruby_1_9_1/string.c
===================================================================
--- ruby_1_9_1/string.c	(revision 22730)
+++ ruby_1_9_1/string.c	(revision 22731)
@@ -4603,6 +4603,10 @@
     int singlebyte = single_byte_optimizable(str);
     int cr;
 
+#define CHECK_IF_ASCII(c) \
+    (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
+	   (cr = ENC_CODERANGE_VALID) : 0)
+
     StringValue(src);
     StringValue(repl);
     if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
@@ -4674,6 +4678,8 @@
 	}
     }
 
+    if (cr == ENC_CODERANGE_VALID)
+	cr = ENC_CODERANGE_7BIT;
     str_modify_keep_cr(str);
     s = RSTRING_PTR(str); send = RSTRING_END(str);
     if (sflag) {
@@ -4682,8 +4688,10 @@
 	char *buf = ALLOC_N(char, max), *t = buf;
 
 	while (s < send) {
-	    c0 = c = rb_enc_codepoint(s, send, enc);
-	    tlen = clen = rb_enc_codelen(c, enc);
+	    int may_modify = 0;
+	    c0 = c = rb_enc_codepoint(s, send, e1);
+	    clen = rb_enc_codelen(c, e1);
+	    tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
 
 	    s += clen;
 	    if (c < 256) {
@@ -4702,7 +4710,10 @@
 		c = errc;
 	    }
 	    if (c != -1) {
-		if (save == c) continue;
+		if (save == c) {
+		    CHECK_IF_ASCII(c);
+		    continue;
+		}
 		save = c;
 		tlen = rb_enc_codelen(c, enc);
 		modify = 1;
@@ -4710,6 +4721,7 @@
 	    else {
 		save = -1;
 		c = c0;
+		if (enc != e1) may_modify = 1;
 	    }
 	    while (t - buf + tlen >= max) {
 		offset = t - buf;
@@ -4718,6 +4730,10 @@
 		t = buf + offset;
 	    }
 	    rb_enc_mbcput(c, t, enc);
+	    if (may_modify && memcmp(s, t, tlen) != 0) {
+		modify = 1;
+	    }
+	    CHECK_IF_ASCII(c);
 	    t += tlen;
 	}
 	*t = '\0';
@@ -4740,6 +4756,7 @@
 		    modify = 1;
 		}
 	    }
+	    CHECK_IF_ASCII(c);
 	    s++;
 	}
     }
@@ -4749,8 +4766,10 @@
 	char *buf = ALLOC_N(char, max), *t = buf;
 
 	while (s < send) {
-	    c0 = c = rb_enc_codepoint(s, send, enc);
-	    tlen = clen = rb_enc_codelen(c, enc);
+	    int may_modify = 0;
+	    c0 = c = rb_enc_codepoint(s, send, e1);
+	    clen = rb_enc_codelen(c, e1);
+	    tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
 
 	    if (c < 256) {
 		c = trans[c];
@@ -4772,8 +4791,8 @@
 		modify = 1;
 	    }
 	    else {
-		modify = 1;
 		c = c0;
+		if (enc != e1) may_modify = 1;
 	    }
 	    while (t - buf + tlen >= max) {
 		offset = t - buf;
@@ -4781,7 +4800,13 @@
 		REALLOC_N(buf, char, max);
 		t = buf + offset;
 	    }
-	    if (s != t) rb_enc_mbcput(c, t, enc);
+	    if (s != t) {
+		rb_enc_mbcput(c, t, enc);
+		if (may_modify && memcmp(s, t, tlen) != 0) {
+		    modify = 1;
+		}
+	    }
+	    CHECK_IF_ASCII(c);
 	    s += clen;
 	    t += tlen;
 	}
@@ -4796,7 +4821,6 @@
     }
     
     if (modify) {
-	cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(repl));
 	if (cr != ENC_CODERANGE_BROKEN)
 	    ENC_CODERANGE_SET(str, cr);
 	rb_enc_associate(str, enc);
Index: ruby_1_9_1/test/ruby/test_string.rb
===================================================================
--- ruby_1_9_1/test/ruby/test_string.rb	(revision 22730)
+++ ruby_1_9_1/test/ruby/test_string.rb	(revision 22731)
@@ -1395,6 +1395,9 @@
     assert_equal(S("hippo"), S("hello").tr(S("el"), S("ip")))
     assert_equal(S("*e**o"), S("hello").tr(S("^aeiou"), S("*")))
     assert_equal(S("hal"),   S("ibm").tr(S("b-z"), S("a-z")))
+
+    a = "abc".force_encoding(Encoding::US_ASCII)
+    assert_equal(Encoding::US_ASCII, a.tr(S("z"), S("\u0101")).encoding)
   end
 
   def test_tr!
@@ -1415,11 +1418,17 @@
     a = S("ibm")
     assert_nil(a.tr!(S("B-Z"), S("A-Z")))
     assert_equal(S("ibm"), a)
+
+    a = "abc".force_encoding(Encoding::US_ASCII)
+    assert_nil(a.tr!(S("z"), S("\u0101")))
+    assert_equal(Encoding::US_ASCII, a.encoding)
   end
 
   def test_tr_s
     assert_equal(S("hypo"), S("hello").tr_s(S("el"), S("yp")))
     assert_equal(S("h*o"),  S("hello").tr_s(S("el"), S("*")))
+    assert_equal("a".hash, "\u0101\u0101".tr_s("\u0101", "a").hash)
+    assert_equal(true, "\u3041\u3041".tr("\u3041", "a").ascii_only?)
   end
 
   def test_tr_s!

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]