[前][次][番号順一覧][スレッド一覧]

ruby-changes:3640

From: ko1@a...
Date: Sat, 19 Jan 2008 22:43:03 +0900 (JST)
Subject: [ruby-changes:3640] akr - Ruby:r15129 (trunk): * string.c (coderange_scan): don't call mbclen functions for ASCII

akr	2008-01-19 22:42:50 +0900 (Sat, 19 Jan 2008)

  New Revision: 15129

  Modified files:
    trunk/ChangeLog
    trunk/string.c

  Log:
    * string.c (coderange_scan): don't call mbclen functions for ASCII
      characters with ASCII compatible encoding.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15129&r2=15128&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15129&r2=15128&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15128)
+++ ChangeLog	(revision 15129)
@@ -1,3 +1,8 @@
+Sat Jan 19 22:41:39 2008  Tanaka Akira  <akr@f...>
+
+	* string.c (coderange_scan): don't call mbclen functions for ASCII
+	  characters with ASCII compatible encoding.
+
 Sat Jan 19 21:00:34 2008  Tanaka Akira  <akr@f...>
 
 	* lib/rdoc/template.rb (RDoc): defined to avoid uninitialized constant
Index: string.c
===================================================================
--- string.c	(revision 15128)
+++ string.c	(revision 15129)
@@ -115,40 +115,92 @@
 
 VALUE rb_fs;
 
+static inline const char *
+search_nonascii(const char *p, const char *e)
+{
+#if ULONG_MAX == 18446744073709551615UL
+# define NONASCII_MASK 0x8080808080808080UL
+#elif ULONG_MAX == 4294967295UL
+# define NONASCII_MASK 0x80808080UL
+#endif
+#ifdef NONASCII_MASK
+    if (sizeof(long) * 2 < e - p) {
+        const unsigned long *s, *t;
+        const VALUE lowbits = sizeof(unsigned long) - 1;
+        s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
+        t = (const unsigned long*)(~lowbits & (VALUE)e);
+        while (p < (const char *)s) {
+            if (!ISASCII(*p))
+                return p;
+            p++;
+        }
+        while (s < t) {
+            if (*s & NONASCII_MASK) {
+                t = s;
+                break;
+            }
+            s++;
+        }
+        p = (const char *)t;
+    }
+#endif
+    while (p < e) {
+        if (!ISASCII(*p))
+            return p;
+        p++;
+    }
+    return NULL;
+}
+
 static int
 coderange_scan(const char *p, long len, rb_encoding *enc)
 {
     const char *e = p + len;
-    int cr;
 
     if (rb_enc_to_index(enc) == 0) {
         /* enc is ASCII-8BIT.  ASCII-8BIT string never be broken. */
+        p = search_nonascii(p, e);
+        return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT;
+    }
+
+    if (rb_enc_asciicompat(enc)) {
+        p = search_nonascii(p, e);
+        if (!p) {
+            return ENC_CODERANGE_7BIT;
+        }
         while (p < e) {
-            if (!ISASCII((unsigned char)*p)) {
-                return ENC_CODERANGE_VALID;
+            int ret = rb_enc_precise_mbclen(p, e, enc);
+            int len = MBCLEN_CHARFOUND(ret);
+            if (!len) {
+                return ENC_CODERANGE_BROKEN;
             }
-            p++;
+            p += len;
+            if (p < e) {
+                p = search_nonascii(p, e);
+                if (!p) {
+                    return ENC_CODERANGE_VALID;
+                }
+            }
         }
-        return ENC_CODERANGE_7BIT;
+        if (e < p) {
+            return ENC_CODERANGE_BROKEN;
+        }
+        return ENC_CODERANGE_VALID;
     }
 
-    cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
     while (p < e) {
         int ret = rb_enc_precise_mbclen(p, e, enc);
         int len = MBCLEN_CHARFOUND(ret);
 
-        if (len) {
-            if (len != 1 || !ISASCII((unsigned char)*p)) {
-                cr = ENC_CODERANGE_VALID;
-            }
-            p += len;
+        if (!len) {
+            return ENC_CODERANGE_BROKEN;
         }
-        else {
-            cr = ENC_CODERANGE_BROKEN;
-            break;
-        }
+        p += len;
     }
-    return cr;
+    if (e < p) {
+        return ENC_CODERANGE_BROKEN;
+    }
+    return ENC_CODERANGE_VALID;
 }
 
 int

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]