ruby-changes:30026

nobu	2013-07-20 12:14:09 +0900 (Sat, 20 Jul 2013)

  New Revision: 42078

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=42078

  Log:
    string.c: wchar succ
    
    * string.c (enc_succ_char, enc_pred_char): consider wchar case.
      [ruby-core:56071] [Bug #8653]
    * string.c (rb_str_succ): do not replace with invalid char.

  Modified files:
    trunk/ChangeLog
    trunk/string.c
    trunk/test/ruby/test_m17n_comb.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 42077)
+++ ChangeLog	(revision 42078)
@@ -1,4 +1,9 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
-Sat Jul 20 12:13:37 2013  Nobuyoshi Nakada  <nobu@r...>
+Sat Jul 20 12:14:07 2013  Nobuyoshi Nakada  <nobu@r...>
+
+	* string.c (enc_succ_char, enc_pred_char): consider wchar case.
+	  [ruby-core:56071] [Bug #8653]
+
+	* string.c (rb_str_succ): do not replace with invalid char.
 
 	* encoding.c (rb_enc_code_to_mbclen): add new function which returns
 	  mbclen from codepoint like as rb_enc_codelen() but 0 for invalid
Index: string.c
===================================================================
--- string.c	(revision 42077)
+++ string.c	(revision 42078)
@@ -2870,6 +2870,24 @@ enc_succ_char(char *p, long len, rb_enco https://github.com/ruby/ruby/blob/trunk/string.c#L2870
 {
     long i;
     int l;
+
+    if (rb_enc_mbminlen(enc) > 1) {
+	/* wchar, trivial case */
+	int r = rb_enc_precise_mbclen(p, p + len, enc), c;
+	if (!MBCLEN_CHARFOUND_P(r)) {
+	    return NEIGHBOR_NOT_CHAR;
+	}
+	c = rb_enc_mbc_to_codepoint(p, p + len, enc) + 1;
+	l = rb_enc_code_to_mbclen(c, enc);
+	if (!l) return NEIGHBOR_NOT_CHAR;
+	if (l != len) return NEIGHBOR_WRAPPED;
+	rb_enc_mbcput(c, p, enc);
+	r = rb_enc_precise_mbclen(p, p + len, enc);
+	if (!MBCLEN_CHARFOUND_P(r)) {
+	    return NEIGHBOR_NOT_CHAR;
+	}
+	return NEIGHBOR_FOUND;
+    }
     while (1) {
         for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
             p[i] = '\0';
@@ -2904,6 +2922,25 @@ enc_pred_char(char *p, long len, rb_enco https://github.com/ruby/ruby/blob/trunk/string.c#L2922
 {
     long i;
     int l;
+    if (rb_enc_mbminlen(enc) > 1) {
+	/* wchar, trivial case */
+	int r = rb_enc_precise_mbclen(p, p + len, enc), c;
+	if (!MBCLEN_CHARFOUND_P(r)) {
+	    return NEIGHBOR_NOT_CHAR;
+	}
+	c = rb_enc_mbc_to_codepoint(p, p + len, enc);
+	if (!c) return NEIGHBOR_NOT_CHAR;
+	--c;
+	l = rb_enc_code_to_mbclen(c, enc);
+	if (!l) return NEIGHBOR_NOT_CHAR;
+	if (l != len) return NEIGHBOR_WRAPPED;
+	rb_enc_mbcput(c, p, enc);
+	r = rb_enc_precise_mbclen(p, p + len, enc);
+	if (!MBCLEN_CHARFOUND_P(r)) {
+	    return NEIGHBOR_NOT_CHAR;
+	}
+	return NEIGHBOR_FOUND;
+    }
     while (1) {
         for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
             p[i] = '\xff';
@@ -3074,12 +3111,16 @@ rb_str_succ(VALUE orig) https://github.com/ruby/ruby/blob/trunk/string.c#L3111
 	s = e;
 	while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
             enum neighbor_char neighbor;
+	    char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
 	    l = rb_enc_precise_mbclen(s, e, enc);
 	    if (!ONIGENC_MBCLEN_CHARFOUND_P(l)) continue;
 	    l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
-            neighbor = enc_succ_char(s, l, enc);
-            if (neighbor == NEIGHBOR_FOUND)
+	    MEMCPY(tmp, s, char, l);
+	    neighbor = enc_succ_char(tmp, l, enc);
+	    if (neighbor == NEIGHBOR_FOUND) {
+		MEMCPY(s, tmp, char, l);
                 return str;
+	    }
             if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
                 /* wrapped to \0...\0.  search next valid char. */
                 enc_succ_char(s, l, enc);
Index: test/ruby/test_m17n_comb.rb
===================================================================
--- test/ruby/test_m17n_comb.rb	(revision 42077)
+++ test/ruby/test_m17n_comb.rb	(revision 42078)
@@ -50,10 +50,12 @@ class TestM17NComb < Test::Unit::TestCas https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n_comb.rb#L50
     # for transitivity test
     u("\xe0\xa0\xa1"), e("\xe0\xa0\xa1"), s("\xe0\xa0\xa1"), # [ruby-dev:32693]
     e("\xa1\xa1"), a("\xa1\xa1"), s("\xa1\xa1"), # [ruby-dev:36484]
+  ]
 
-    #"aa".force_encoding("utf-16be"),
-    #"aaaa".force_encoding("utf-32be"),
-    #"aaa".force_encoding("utf-32be"),
+  WSTRINGS = [
+    "aa".force_encoding("utf-16be"),
+    "aaaa".force_encoding("utf-32be"),
+    "aaa".force_encoding("utf-32be"),
   ]
 
   def combination(*args, &b)
@@ -84,7 +86,7 @@ class TestM17NComb < Test::Unit::TestCas https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n_comb.rb#L86
     r
   end
 
-  def enccall(recv, meth, *args, &block)
+  def assert_enccall(recv, meth, *args, &block)
     desc = ''
     if String === recv
       desc << encdump(recv)
@@ -113,6 +115,7 @@ class TestM17NComb < Test::Unit::TestCas https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n_comb.rb#L115
     }
     result
   end
+  alias enccall assert_enccall
 
   def assert_str_enc_propagation(t, s1, s2)
     if !s1.ascii_only?
@@ -1327,6 +1330,14 @@ class TestM17NComb < Test::Unit::TestCas https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n_comb.rb#L1330
         s = t
       }
     }
+
+    Encoding.list.each do |enc|
+      next if enc.dummy?
+      {"A"=>"B", "A1"=>"A2", "A9"=>"B0", "9"=>"10", "Z"=>"AA"}.each do |orig, expected|
+        s = orig.encode(enc)
+        assert_strenc(expected.encode(enc), enc, s.succ, proc {"#{orig.dump}.encode(#{enc}).succ"})
+      end
+    end
   end
 
   def test_str_hash

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/