[前][次][番号順一覧][スレッド一覧]

ruby-changes:4014

From: ko1@a...
Date: Sat, 16 Feb 2008 16:16:53 +0900 (JST)
Subject: [ruby-changes:4014] akr - Ruby:r15504 (trunk): * string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen.

akr	2008-02-16 16:16:36 +0900 (Sat, 16 Feb 2008)

  New Revision: 15504

  Modified files:
    trunk/ChangeLog
    trunk/string.c

  Log:
    * string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen.
      (str_strlen): UTF-8 character count is only applicable for valid
      UTF-8 string.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15504&r2=15503&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15504&r2=15503&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15503)
+++ ChangeLog	(revision 15504)
@@ -1,3 +1,9 @@
+Sat Feb 16 16:14:35 2008  Tanaka Akira  <akr@f...>
+
+	* string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen.
+	  (str_strlen): UTF-8 character count is only applicable for valid
+	  UTF-8 string.
+
 Sat Feb 16 13:16:49 2008  Tanaka Akira  <akr@f...>
 
 	* string.c (rb_str_sub_bang): stringize replacing hash values.
Index: string.c
===================================================================
--- string.c	(revision 15503)
+++ string.c	(revision 15504)
@@ -597,35 +597,7 @@
     if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
         return (e - p) / rb_enc_mbminlen(enc);
     }
-#ifdef NONASCII_MASK
-    else if (enc == rb_utf8_encoding()) {
-	if (sizeof(long) * 2 < e - p) {
-	    const unsigned long *s, *t;
-	    const VALUE lowbits = sizeof(unsigned long) - 1;
-	    s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
-	    t = (const unsigned long*)(~lowbits & (VALUE)e);
-	    for (c=0; p<(const char *)s; p++) {
-		if (((*p)&0xC0) != 0x80) c++;
-	    }
-	    while (s < t) {
-		unsigned long d = *s;
-		d = (~d ^ (d&(d<<1)))&NONASCII_MASK;
-		d = (d>>7) + (d>>15);
-		d = d + (d>>16);
-#if NONASCII_MASK == 0x8080808080808080UL
-		d = d + (d>>32);
-#endif
-		c += (long)(d&0xF);
-		s++;
-	    }
-	    p = (const char *)t;
-	}
-	for (; p<e; p++) {
-	    if (((*p)&0xC0) != 0x80) c++;
-	}
-	return c;
-    }
-#endif
+
     else if (rb_enc_asciicompat(enc)) {
         c = 0;
         while (p < e) {
@@ -658,6 +630,37 @@
     if (!enc) enc = STR_ENC_GET(str);
     p = RSTRING_PTR(str);
     e = RSTRING_END(str);
+#ifdef NONASCII_MASK
+    if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
+        enc == rb_utf8_encoding()) {
+        len = 0;
+	if (sizeof(long) * 2 < e - p) {
+	    const unsigned long *s, *t;
+	    const VALUE lowbits = sizeof(unsigned long) - 1;
+	    s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
+	    t = (const unsigned long*)(~lowbits & (VALUE)e);
+	    for (len=0; p<(const char *)s; p++) {
+		if (((*p)&0xC0) != 0x80) len++;
+	    }
+	    while (s < t) {
+		unsigned long d = *s;
+		d = (~d ^ (d&(d<<1)))&NONASCII_MASK;
+		d = (d>>7) + (d>>15);
+		d = d + (d>>16);
+#if NONASCII_MASK == 0x8080808080808080UL
+		d = d + (d>>32);
+#endif
+		len += (long)(d&0xF);
+		s++;
+	    }
+	    p = (const char *)t;
+	}
+	for (; p<e; p++) {
+	    if (((*p)&0xC0) != 0x80) len++;
+	}
+    }
+    else
+#endif
     len = rb_enc_strlen(p, e, enc);
     if (len < 0) {
 	rb_raise(rb_eArgError, "invalid mbstring sequence");

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]