ruby-changes:30026
From: nobu <ko1@a...>
Date: Sat, 20 Jul 2013 12:14:18 +0900 (JST)
Subject: [ruby-changes:30026] nobu:r42078 (trunk): string.c: wchar succ
nobu 2013-07-20 12:14:09 +0900 (Sat, 20 Jul 2013) New Revision: 42078 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=42078 Log: string.c: wchar succ * string.c (enc_succ_char, enc_pred_char): consider wchar case. [ruby-core:56071] [Bug #8653] * string.c (rb_str_succ): do not replace with invalid char. Modified files: trunk/ChangeLog trunk/string.c trunk/test/ruby/test_m17n_comb.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 42077) +++ ChangeLog (revision 42078) @@ -1,4 +1,9 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 -Sat Jul 20 12:13:37 2013 Nobuyoshi Nakada <nobu@r...> +Sat Jul 20 12:14:07 2013 Nobuyoshi Nakada <nobu@r...> + + * string.c (enc_succ_char, enc_pred_char): consider wchar case. + [ruby-core:56071] [Bug #8653] + + * string.c (rb_str_succ): do not replace with invalid char. * encoding.c (rb_enc_code_to_mbclen): add new function which returns mbclen from codepoint like as rb_enc_codelen() but 0 for invalid Index: string.c =================================================================== --- string.c (revision 42077) +++ string.c (revision 42078) @@ -2870,6 +2870,24 @@ enc_succ_char(char *p, long len, rb_enco https://github.com/ruby/ruby/blob/trunk/string.c#L2870 { long i; int l; + + if (rb_enc_mbminlen(enc) > 1) { + /* wchar, trivial case */ + int r = rb_enc_precise_mbclen(p, p + len, enc), c; + if (!MBCLEN_CHARFOUND_P(r)) { + return NEIGHBOR_NOT_CHAR; + } + c = rb_enc_mbc_to_codepoint(p, p + len, enc) + 1; + l = rb_enc_code_to_mbclen(c, enc); + if (!l) return NEIGHBOR_NOT_CHAR; + if (l != len) return NEIGHBOR_WRAPPED; + rb_enc_mbcput(c, p, enc); + r = rb_enc_precise_mbclen(p, p + len, enc); + if (!MBCLEN_CHARFOUND_P(r)) { + return NEIGHBOR_NOT_CHAR; + } + return NEIGHBOR_FOUND; + } while (1) { for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--) p[i] = '\0'; @@ -2904,6 +2922,25 @@ enc_pred_char(char *p, long len, rb_enco https://github.com/ruby/ruby/blob/trunk/string.c#L2922 { long i; int l; + if (rb_enc_mbminlen(enc) > 1) { + /* wchar, trivial case */ + int r = rb_enc_precise_mbclen(p, p + len, enc), c; + if (!MBCLEN_CHARFOUND_P(r)) { + return NEIGHBOR_NOT_CHAR; + } + c = rb_enc_mbc_to_codepoint(p, p + len, enc); + if (!c) return NEIGHBOR_NOT_CHAR; + --c; + l = rb_enc_code_to_mbclen(c, enc); + if (!l) return NEIGHBOR_NOT_CHAR; + if (l != len) return NEIGHBOR_WRAPPED; + rb_enc_mbcput(c, p, enc); + r = rb_enc_precise_mbclen(p, p + len, enc); + if (!MBCLEN_CHARFOUND_P(r)) { + return NEIGHBOR_NOT_CHAR; + } + return NEIGHBOR_FOUND; + } while (1) { for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--) p[i] = '\xff'; @@ -3074,12 +3111,16 @@ rb_str_succ(VALUE orig) https://github.com/ruby/ruby/blob/trunk/string.c#L3111 s = e; while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) { enum neighbor_char neighbor; + char tmp[ONIGENC_CODE_TO_MBC_MAXLEN]; l = rb_enc_precise_mbclen(s, e, enc); if (!ONIGENC_MBCLEN_CHARFOUND_P(l)) continue; l = ONIGENC_MBCLEN_CHARFOUND_LEN(l); - neighbor = enc_succ_char(s, l, enc); - if (neighbor == NEIGHBOR_FOUND) + MEMCPY(tmp, s, char, l); + neighbor = enc_succ_char(tmp, l, enc); + if (neighbor == NEIGHBOR_FOUND) { + MEMCPY(s, tmp, char, l); return str; + } if (rb_enc_precise_mbclen(s, s+l, enc) != l) { /* wrapped to \0...\0. search next valid char. */ enc_succ_char(s, l, enc); Index: test/ruby/test_m17n_comb.rb =================================================================== --- test/ruby/test_m17n_comb.rb (revision 42077) +++ test/ruby/test_m17n_comb.rb (revision 42078) @@ -50,10 +50,12 @@ class TestM17NComb < Test::Unit::TestCas https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n_comb.rb#L50 # for transitivity test u("\xe0\xa0\xa1"), e("\xe0\xa0\xa1"), s("\xe0\xa0\xa1"), # [ruby-dev:32693] e("\xa1\xa1"), a("\xa1\xa1"), s("\xa1\xa1"), # [ruby-dev:36484] + ] - #"aa".force_encoding("utf-16be"), - #"aaaa".force_encoding("utf-32be"), - #"aaa".force_encoding("utf-32be"), + WSTRINGS = [ + "aa".force_encoding("utf-16be"), + "aaaa".force_encoding("utf-32be"), + "aaa".force_encoding("utf-32be"), ] def combination(*args, &b) @@ -84,7 +86,7 @@ class TestM17NComb < Test::Unit::TestCas https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n_comb.rb#L86 r end - def enccall(recv, meth, *args, &block) + def assert_enccall(recv, meth, *args, &block) desc = '' if String === recv desc << encdump(recv) @@ -113,6 +115,7 @@ class TestM17NComb < Test::Unit::TestCas https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n_comb.rb#L115 } result end + alias enccall assert_enccall def assert_str_enc_propagation(t, s1, s2) if !s1.ascii_only? @@ -1327,6 +1330,14 @@ class TestM17NComb < Test::Unit::TestCas https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n_comb.rb#L1330 s = t } } + + Encoding.list.each do |enc| + next if enc.dummy? + {"A"=>"B", "A1"=>"A2", "A9"=>"B0", "9"=>"10", "Z"=>"AA"}.each do |orig, expected| + s = orig.encode(enc) + assert_strenc(expected.encode(enc), enc, s.succ, proc {"#{orig.dump}.encode(#{enc}).succ"}) + end + end end def test_str_hash -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/