[前][次][番号順一覧][スレッド一覧]

ruby-changes:4021

From: ko1@a...
Date: Sat, 16 Feb 2008 20:53:19 +0900 (JST)
Subject: [ruby-changes:4021] naruse - Ruby:r15511 (trunk): * string.c (rb_str_substr): optimized for UTF-8.

naruse	2008-02-16 20:53:04 +0900 (Sat, 16 Feb 2008)

  New Revision: 15511

  Modified files:
    trunk/ChangeLog
    trunk/string.c

  Log:
    * string.c (rb_str_substr): optimized for UTF-8.

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15511&r2=15510&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15511&r2=15510&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15510)
+++ ChangeLog	(revision 15511)
@@ -1,3 +1,7 @@
+Sat Feb 16 20:49:34 2008  NARUSE, Yui  <naruse@r...>
+
+	* string.c (rb_str_substr): optimized for UTF-8.
+
 Sat Feb 16 18:13:53 2008  Tanaka Akira  <akr@f...>
 
 	* encoding.c (rb_enc_compatible): check encoding incapable arguments.
@@ -12,7 +16,7 @@
 
 Sat Feb 16 18:25:14 2008  NARUSE, Yui  <naruse@r...>
 
-	* string.c (str_strlen): little more optimize.
+	* string.c (str_strlen): little more optimization.
 	  (rb_enc_nth): remove needless variable 'c'.
 
 Sat Feb 16 18:00:13 2008  Tanaka Akira  <akr@f...>
Index: string.c
===================================================================
--- string.c	(revision 15510)
+++ string.c	(revision 15511)
@@ -1011,6 +1011,58 @@
     return pp - p;
 }
 
+#ifdef NONASCII_MASK
+static char *
+str_utf8_nth(const char *p, const char *e, int nth)
+{
+    if (sizeof(long) * 2 < nth) {
+	const unsigned long *s, *t;
+	const VALUE lowbits = sizeof(unsigned long) - 1;
+	s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
+	t = (const unsigned long*)(~lowbits & (VALUE)e);
+	for (; p<(const char *)s && 0<nth; p++) {
+	    if (((*p)&0xC0) != 0x80) nth--;
+	}
+	while (s < t) {
+	    unsigned long d = *s++;
+	    d = ~d | (d<<1);
+	    d &= NONASCII_MASK;
+	    d >>= 7;
+	    d += (d>>8);
+	    d += (d>>16);
+#if NONASCII_MASK == 0x8080808080808080UL
+	    d += (d>>32);
+#endif
+	    nth -= (long)(d&0xF);
+	    if (nth < 8) {
+		t = s;
+		break;
+	    }
+	}
+	p = (char *)t;
+    }
+    if (0 < nth) {
+	while (p < e) {
+	    if (((*p)&0xC0) != 0x80) {
+		nth--;
+		if (nth < 0)
+		    break;
+	    }
+	    p++;
+	}
+    }
+    return (char *)p;
+}
+
+static int
+str_utf8_offset(const char *p, const char *e, int nth)
+{
+    const char *pp = str_utf8_nth(p, e, nth);
+    if (!pp) return e - p;
+    return pp - p;
+}
+#endif
+
 static long
 str_sublen(VALUE str, long pos, rb_encoding *enc)
 {
@@ -1082,6 +1134,13 @@
     if (len == 0) {
 	p = 0;
     }
+#ifdef NONASCII_MASK
+    else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
+        enc == rb_utf8_encoding()) {
+        p = str_utf8_nth(s, e, beg);
+        len = str_utf8_offset(p, e, len);
+    }
+#endif
     else if ((p = str_nth(s, e, beg, enc, singlebyte)) == e) {
 	len = 0;
     }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]