[前][次][番号順一覧][スレッド一覧]

ruby-changes:4060

From: ko1@a...
Date: Tue, 19 Feb 2008 21:18:18 +0900 (JST)
Subject: [ruby-changes:4060] naruse - Ruby:r15550 (trunk): * string.c (rb_enc_strlen_cr): get length with coderange scan.

naruse	2008-02-19 21:18:03 +0900 (Tue, 19 Feb 2008)

  New Revision: 15550

  Modified files:
    trunk/ChangeLog
    trunk/string.c

  Log:
    * string.c (rb_enc_strlen_cr): get length with coderange scan.
    
    * string.c (str_strlen): use rb_enc_strlen_cr. [ruby-dev:33849]

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15550&r2=15549&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15550&r2=15549&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15549)
+++ ChangeLog	(revision 15550)
@@ -1,3 +1,9 @@
+Tue Feb 19 21:11:49 2008  NARUSE, Yui  <naruse@r...>
+
+	* string.c (rb_enc_strlen_cr): get length with coderange scan.
+
+	* string.c (str_strlen): use rb_enc_strlen_cr. [ruby-dev:33849]
+
 Tue Feb 19 20:49:49 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* eval.c (rb_raise_jump): moved adjustment for control frame.
Index: string.c
===================================================================
--- string.c	(revision 15549)
+++ string.c	(revision 15550)
@@ -128,12 +128,12 @@
         const unsigned long *s, *t;
         const VALUE lowbits = sizeof(unsigned long) - 1;
         s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
-        t = (const unsigned long*)(~lowbits & (VALUE)e);
         while (p < (const char *)s) {
             if (!ISASCII(*p))
                 return p;
             p++;
         }
+        t = (const unsigned long*)(~lowbits & (VALUE)e);
         while (s < t) {
             if (*s & NONASCII_MASK) {
                 t = s;
@@ -619,10 +619,63 @@
     return c;
 }
 
+long
+rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
+{
+    long c;
+    const char *q;
+    int ret;
+
+    *cr = 0;
+    if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
+	return (e - p) / rb_enc_mbminlen(enc);
+    }
+    else if (rb_enc_asciicompat(enc)) {
+	c = 0;
+	while (p < e) {
+	    if (ISASCII(*p)) {
+		q = search_nonascii(p, e);
+		if (!q) {
+		    return c + (e - p);
+		}
+		c += q - p;
+		p = q;
+	    }
+	    ret = rb_enc_precise_mbclen(p, e, enc);
+	    if (MBCLEN_CHARFOUND_P(ret)) {
+		*cr |= ENC_CODERANGE_VALID;
+		p += MBCLEN_CHARFOUND_LEN(ret);
+	    }
+	    else {
+		*cr = ENC_CODERANGE_BROKEN;
+		p++;
+	    }
+	    c++;
+	}
+	if (!*cr) *cr = ENC_CODERANGE_7BIT;
+	return c;
+    }
+
+    for (c=0; p<e; c++) {
+	ret = rb_enc_precise_mbclen(p, e, enc);
+	if (MBCLEN_CHARFOUND_P(ret)) {
+	    *cr |= ENC_CODERANGE_VALID;
+	    p += MBCLEN_CHARFOUND_LEN(ret);
+	}
+	else {
+	    *cr = ENC_CODERANGE_BROKEN;
+	    p++;
+	}
+    }
+    if (!*cr) *cr = ENC_CODERANGE_7BIT;
+    return c;
+}
+
 static long
 str_strlen(VALUE str, rb_encoding *enc)
 {
     const char *p, *e;
+    int n, cr;
 
     if (single_byte_optimizable(str)) return RSTRING_LEN(str);
     if (!enc) enc = STR_ENC_GET(str);
@@ -661,7 +714,11 @@
 	return len;
     }
 #endif
-    return rb_enc_strlen(p, e, enc);
+    n = rb_enc_strlen_cr(p, e, enc, &cr);
+    if (cr) {
+        ENC_CODERANGE_SET(str, cr);
+    }
+    return n;
 }
 
 /*

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]