ruby-changes:11131
From: yugui <ko1@a...>
Date: Tue, 3 Mar 2009 20:39:32 +0900 (JST)
Subject: [ruby-changes:11131] Ruby:r22731 (ruby_1_9_1): merges r22505 and r22547 from trunk into ruby_1_9_1.
yugui 2009-03-03 20:39:19 +0900 (Tue, 03 Mar 2009) New Revision: 22731 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=22731 Log: merges r22505 and r22547 from trunk into ruby_1_9_1. -- * string.c (tr_trans): should not be affected by the encoding of replacement unless actually modified. [ruby-talk:328967] -- * string.c (tr_trans): should recalculate coderange. [ruby-core:22326] (reopened at [ruby-core:22328]) Modified files: branches/ruby_1_9_1/ChangeLog branches/ruby_1_9_1/string.c branches/ruby_1_9_1/test/ruby/test_string.rb Index: ruby_1_9_1/ChangeLog =================================================================== --- ruby_1_9_1/ChangeLog (revision 22730) +++ ruby_1_9_1/ChangeLog (revision 22731) @@ -1,3 +1,13 @@ +Sun Feb 22 22:42:20 2009 Nobuyoshi Nakada <nobu@r...> + + * string.c (tr_trans): should recalculate coderange. + [ruby-core:22326] (reopened at [ruby-core:22328]) + +Sun Feb 22 14:33:06 2009 Nobuyoshi Nakada <nobu@r...> + + * string.c (tr_trans): should not be affected by the encoding of + replacement unless actually modified. [ruby-talk:328967] + Fri Feb 6 12:11:24 2009 NAKAMURA Usaku <usa@r...> * ruby.c (process_options): set initial default_external before -r. Index: ruby_1_9_1/string.c =================================================================== --- ruby_1_9_1/string.c (revision 22730) +++ ruby_1_9_1/string.c (revision 22731) @@ -4603,6 +4603,10 @@ int singlebyte = single_byte_optimizable(str); int cr; +#define CHECK_IF_ASCII(c) \ + (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \ + (cr = ENC_CODERANGE_VALID) : 0) + StringValue(src); StringValue(repl); if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; @@ -4674,6 +4678,8 @@ } } + if (cr == ENC_CODERANGE_VALID) + cr = ENC_CODERANGE_7BIT; str_modify_keep_cr(str); s = RSTRING_PTR(str); send = RSTRING_END(str); if (sflag) { @@ -4682,8 +4688,10 @@ char *buf = ALLOC_N(char, max), *t = buf; while (s < send) { - c0 = c = rb_enc_codepoint(s, send, enc); - tlen = clen = rb_enc_codelen(c, enc); + int may_modify = 0; + c0 = c = rb_enc_codepoint(s, send, e1); + clen = rb_enc_codelen(c, e1); + tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); s += clen; if (c < 256) { @@ -4702,7 +4710,10 @@ c = errc; } if (c != -1) { - if (save == c) continue; + if (save == c) { + CHECK_IF_ASCII(c); + continue; + } save = c; tlen = rb_enc_codelen(c, enc); modify = 1; @@ -4710,6 +4721,7 @@ else { save = -1; c = c0; + if (enc != e1) may_modify = 1; } while (t - buf + tlen >= max) { offset = t - buf; @@ -4718,6 +4730,10 @@ t = buf + offset; } rb_enc_mbcput(c, t, enc); + if (may_modify && memcmp(s, t, tlen) != 0) { + modify = 1; + } + CHECK_IF_ASCII(c); t += tlen; } *t = '\0'; @@ -4740,6 +4756,7 @@ modify = 1; } } + CHECK_IF_ASCII(c); s++; } } @@ -4749,8 +4766,10 @@ char *buf = ALLOC_N(char, max), *t = buf; while (s < send) { - c0 = c = rb_enc_codepoint(s, send, enc); - tlen = clen = rb_enc_codelen(c, enc); + int may_modify = 0; + c0 = c = rb_enc_codepoint(s, send, e1); + clen = rb_enc_codelen(c, e1); + tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); if (c < 256) { c = trans[c]; @@ -4772,8 +4791,8 @@ modify = 1; } else { - modify = 1; c = c0; + if (enc != e1) may_modify = 1; } while (t - buf + tlen >= max) { offset = t - buf; @@ -4781,7 +4800,13 @@ REALLOC_N(buf, char, max); t = buf + offset; } - if (s != t) rb_enc_mbcput(c, t, enc); + if (s != t) { + rb_enc_mbcput(c, t, enc); + if (may_modify && memcmp(s, t, tlen) != 0) { + modify = 1; + } + } + CHECK_IF_ASCII(c); s += clen; t += tlen; } @@ -4796,7 +4821,6 @@ } if (modify) { - cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(repl)); if (cr != ENC_CODERANGE_BROKEN) ENC_CODERANGE_SET(str, cr); rb_enc_associate(str, enc); Index: ruby_1_9_1/test/ruby/test_string.rb =================================================================== --- ruby_1_9_1/test/ruby/test_string.rb (revision 22730) +++ ruby_1_9_1/test/ruby/test_string.rb (revision 22731) @@ -1395,6 +1395,9 @@ assert_equal(S("hippo"), S("hello").tr(S("el"), S("ip"))) assert_equal(S("*e**o"), S("hello").tr(S("^aeiou"), S("*"))) assert_equal(S("hal"), S("ibm").tr(S("b-z"), S("a-z"))) + + a = "abc".force_encoding(Encoding::US_ASCII) + assert_equal(Encoding::US_ASCII, a.tr(S("z"), S("\u0101")).encoding) end def test_tr! @@ -1415,11 +1418,17 @@ a = S("ibm") assert_nil(a.tr!(S("B-Z"), S("A-Z"))) assert_equal(S("ibm"), a) + + a = "abc".force_encoding(Encoding::US_ASCII) + assert_nil(a.tr!(S("z"), S("\u0101"))) + assert_equal(Encoding::US_ASCII, a.encoding) end def test_tr_s assert_equal(S("hypo"), S("hello").tr_s(S("el"), S("yp"))) assert_equal(S("h*o"), S("hello").tr_s(S("el"), S("*"))) + assert_equal("a".hash, "\u0101\u0101".tr_s("\u0101", "a").hash) + assert_equal(true, "\u3041\u3041".tr("\u3041", "a").ascii_only?) end def test_tr_s! -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/