ruby-changes:4205
From: ko1@a...
Date: Wed, 5 Mar 2008 22:55:24 +0900 (JST)
Subject: [ruby-changes:4205] naruse - Ruby:r15695 (trunk): * string.c (is_utf8_lead_byte, count_utf8_lead_bytes_with_ulong):
naruse 2008-03-05 22:54:36 +0900 (Wed, 05 Mar 2008)
New Revision: 15695
Modified files:
trunk/ChangeLog
trunk/string.c
Log:
* string.c (is_utf8_lead_byte, count_utf8_lead_bytes_with_ulong):
defined for UTF-8 optimization.
* string.c (str_strlen): use is_utf8_lead_byte and
count_utf8_lead_bytes_with_ulong.
* string.c (str_utf8_nth) ditto.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15695&r2=15694&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15695&r2=15694&diff_format=u
Index: ChangeLog
===================================================================
--- ChangeLog (revision 15694)
+++ ChangeLog (revision 15695)
@@ -1,3 +1,13 @@
+Wed Mar 05 22:49:20 2008 NARUSE, Yui <naruse@r...>
+
+ * string.c (is_utf8_lead_byte, count_utf8_lead_bytes_with_ulong):
+ defined for UTF-8 optimization.
+
+ * string.c (str_strlen): use is_utf8_lead_byte and
+ count_utf8_lead_bytes_with_ulong.
+
+ * string.c (str_utf8_nth) ditto.
+
Wed Mar 5 17:53:01 2008 Nobuyoshi Nakada <nobu@r...>
* file.c (rb_file_flock): returns false on EAGAIN if non-blocking.
Index: string.c
===================================================================
--- string.c (revision 15694)
+++ string.c (revision 15695)
@@ -755,6 +755,24 @@
return c;
}
+#ifdef NONASCII_MASK
+#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
+static inline const long
+count_utf8_lead_bytes_with_ulong(const unsigned long *s)
+{
+ unsigned long d = *s;
+ d |= ~(d>>1);
+ d >>= 6;
+ d &= NONASCII_MASK >> 3;
+ d += (d>>8);
+ d += (d>>16);
+#if NONASCII_MASK == 0x8080808080808080UL
+ d += (d>>32);
+#endif
+ return (long)(d&0xF);
+}
+#endif
+
static long
str_strlen(VALUE str, rb_encoding *enc)
{
@@ -774,26 +792,19 @@
const VALUE lowbits = sizeof(unsigned long) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
t = (const unsigned long*)(~lowbits & (VALUE)e);
- for (len=0; p<(const char *)s; p++) {
- if (((*p)&0xC0) != 0x80) len++;
+ while (p < (const char *)s) {
+ if (is_utf8_lead_byte(*p)) len++;
+ p++;
}
while (s < t) {
- unsigned long d = *s;
- d = ~d | (d<<1);
- d &= NONASCII_MASK;
- d >>= 7;
- d += (d>>8);
- d += (d>>16);
-#if NONASCII_MASK == 0x8080808080808080UL
- d = d + (d>>32);
-#endif
- len += (long)(d&0xF);
+ len += count_utf8_lead_bytes_with_ulong(s);
s++;
}
- p = (const char *)t;
+ p = (const char *)s;
}
- for (; p<e; p++) {
- if (((*p)&0xC0) != 0x80) len++;
+ while (p < e) {
+ if (is_utf8_lead_byte(*p)) len++;
+ p++;
}
return len;
}
@@ -1162,33 +1173,22 @@
const VALUE lowbits = sizeof(unsigned long) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
t = (const unsigned long*)(~lowbits & (VALUE)e);
- for (; p<(const char *)s && 0<nth; p++) {
- if (((*p)&0xC0) != 0x80) nth--;
+ while (p < (const char *)s) {
+ if (is_utf8_lead_byte(*p)) nth--;
+ p++;
}
while (s < t) {
- unsigned long d = *s++;
- d = ~d | (d<<1);
- d &= NONASCII_MASK;
- d >>= 7;
- d += (d>>8);
- d += (d>>16);
-#if NONASCII_MASK == 0x8080808080808080UL
- d += (d>>32);
-#endif
- nth -= (long)(d&0xF);
- if (nth < 8) {
- t = s;
- break;
- }
+ nth -= count_utf8_lead_bytes_with_ulong(s);
+ if (nth < sizeof(long)) break;
+ s++;
}
- p = (char *)t;
+ p = (char *)s;
}
if (0 < nth) {
while (p < e) {
- if (((*p)&0xC0) != 0x80) {
+ if (is_utf8_lead_byte(*p)) {
nth--;
- if (nth < 0)
- break;
+ if (nth < 0) break;
}
p++;
}
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/