[前][次][番号順一覧][スレッド一覧]

ruby-changes:4999

From: ko1@a...
Date: Tue, 20 May 2008 16:59:58 +0900 (JST)
Subject: [ruby-changes:4999] matz - Ruby:r16492 (trunk): * string.c (rb_enc_strlen_cr): need to set ENC_CODERANGE_7BIT if

matz	2008-05-20 16:59:33 +0900 (Tue, 20 May 2008)

  New Revision: 16492

  Modified files:
    trunk/ChangeLog
    trunk/string.c
    trunk/test/ruby/test_array.rb

  Log:
    * string.c (rb_enc_strlen_cr): need to set ENC_CODERANGE_7BIT if
      search_nonascii() fails.    [ruby-dev:34751]
    
    * string.c (rb_str_reverse): preserve coderange info if the
      receiver is 7bit string.
    
    * string.c (rb_str_reverse_bang): ditto.
    
    * string.c (rb_str_reverse_bang): should have called
      single_byte_optimizable before rb_str_modify() that clears
      coderange info.
    
    * string.c (tr_trans): handle single bytes more eagerly.

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=16492&r2=16491&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=16492&r2=16491&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_array.rb?r1=16492&r2=16491&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 16491)
+++ ChangeLog	(revision 16492)
@@ -10,6 +10,22 @@
 
 	* vm.c (invoke_block_from_c): fix call flow.
 
+Tue May 20 08:38:56 2008  Yukihiro Matsumoto  <matz@r...>
+
+	* string.c (rb_enc_strlen_cr): need to set ENC_CODERANGE_7BIT if
+	  search_nonascii() fails.    [ruby-dev:34751]
+
+	* string.c (rb_str_reverse): preserve coderange info if the
+	  receiver is 7bit string.
+
+	* string.c (rb_str_reverse_bang): ditto.
+
+	* string.c (rb_str_reverse_bang): should have called
+	  single_byte_optimizable before rb_str_modify() that clears
+	  coderange info.
+
+	* string.c (tr_trans): handle single bytes more eagerly.
+
 Mon May 19 23:19:35 2008  Yusuke Endoh  <mame@t...>
 
 	* regexec.c (slow_search): check the case when the length is 1.
Index: string.c
===================================================================
--- string.c	(revision 16491)
+++ string.c	(revision 16492)
@@ -726,6 +726,7 @@
 	    if (ISASCII(*p)) {
 		q = search_nonascii(p, e);
 		if (!q) {
+		    if (!*cr) *cr = ENC_CODERANGE_7BIT;
 		    return c + (e - p);
 		}
 		c += q - p;
@@ -1237,7 +1238,7 @@
     rb_encoding *enc = STR_ENC_GET(str);
     VALUE str2;
     char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
-    int singlebyte = single_byte_optimizable(str);
+    int singlebyte;
 
     if (len < 0) return Qnil;
     if (!RSTRING_LEN(str)) {
@@ -1263,6 +1264,7 @@
     else if (beg > 0 && beg > str_strlen(str, enc)) {
 	return Qnil;
     }
+    singlebyte = single_byte_optimizable(str);
     if (len == 0) {
 	p = 0;
     }
@@ -3521,14 +3523,15 @@
 rb_str_reverse(VALUE str)
 {
     rb_encoding *enc;
-    VALUE obj;
+    VALUE rev;
     char *s, *e, *p;
+    int single = 1;
 
     if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
     enc = STR_ENC_GET(str);
-    obj = rb_str_new5(str, 0, RSTRING_LEN(str));
+    rev = rb_str_new5(str, 0, RSTRING_LEN(str));
     s = RSTRING_PTR(str); e = RSTRING_END(str);
-    p = RSTRING_END(obj);
+    p = RSTRING_END(rev);
 
     if (RSTRING_LEN(str) > 1) {
 	if (single_byte_optimizable(str)) {
@@ -3540,17 +3543,26 @@
 	    while (s < e) {
 		int clen = rb_enc_mbclen(s, e, enc);
 
+		if (clen > 1 || (*s & 0x80)) single = 0;
 		p -= clen;
 		memcpy(p, s, clen);
 		s += clen;
 	    }
 	}
     }
-    STR_SET_LEN(obj, RSTRING_LEN(str));
-    OBJ_INFECT(obj, str);
-    rb_enc_cr_str_copy_for_substr(obj, str);
+    STR_SET_LEN(rev, RSTRING_LEN(str));
+    OBJ_INFECT(rev, str);
+    if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
+	if (single) {
+	    ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
+	}
+	else {
+	    ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
+	}
+    }
+    rb_enc_cr_str_copy_for_substr(rev, str);
 
-    return obj;
+    return rev;
 }
 
 
@@ -3564,19 +3576,25 @@
 static VALUE
 rb_str_reverse_bang(VALUE str)
 {
-    char *s, *e, c;
-
     if (RSTRING_LEN(str) > 1) {
-	rb_str_modify(str);
-	s = RSTRING_PTR(str);
-	e = RSTRING_END(str) - 1;
+	if (single_byte_optimizable(str)) {
+	    char *s, *e, c;
+	    int cr = ENC_CODERANGE(str);
+	    int single = 1;
 
-	if (single_byte_optimizable(str)) {
+	    rb_str_modify(str);
+	    s = RSTRING_PTR(str);
+	    e = RSTRING_END(str) - 1;
 	    while (s < e) {
 		c = *s;
+		if (*s & 0x80) single = 0;
 		*s++ = *e;
  		*e-- = c;
 	    }
+	    if (cr == ENC_CODERANGE_UNKNOWN && single) {
+		cr = ENC_CODERANGE_7BIT;
+	    }
+	    ENC_CODERANGE_SET(str, cr);
 	}
 	else {
 	    rb_str_shared_replace(str, rb_str_reverse(str));
@@ -4226,6 +4244,7 @@
     int c, c0, last = 0, modify = 0, i, l;
     char *s, *send;
     VALUE hash = 0;
+    int singlebyte = single_byte_optimizable(str);
 
     StringValue(src);
     StringValue(repl);
@@ -4288,6 +4307,7 @@
 	    if (r == -1) r = trrepl.now;
 	    if (c < 256) {
 		trans[c] = r;
+		if (r > 255) singlebyte = 0;
 	    }
 	    else {
 		if (!hash) hash = rb_hash_new();
@@ -4348,7 +4368,7 @@
 	STR_SET_NOEMBED(str);
 	RSTRING(str)->as.heap.aux.capa = max;
     }
-    else if (rb_enc_mbmaxlen(enc) == 1) {
+    else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
 	while (s < send) {
 	    c = (unsigned char)*s;
 	    if (trans[c] >= 0) {
Index: test/ruby/test_array.rb
===================================================================
--- test/ruby/test_array.rb	(revision 16491)
+++ test/ruby/test_array.rb	(revision 16492)
@@ -74,7 +74,7 @@
   end
 
   def test_split_0
-    x = "The Boassert of Mormon"
+    x = "The Book of Mormon"
     assert_equal(x.reverse, x.split(//).reverse!.join)
     assert_equal(x.reverse, x.reverse!)
     assert_equal("g:n:i:r:t:s: :e:t:y:b: :1", "1 byte string".split(//).reverse.join(":"))

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]