ruby-changes:4241

naruse	2008-03-08 18:05:34 +0900 (Sat, 08 Mar 2008)

  New Revision: 15731

  Modified files:
    trunk/ChangeLog
    trunk/string.c
    trunk/test/ruby/test_m17n.rb
    trunk/version.h

  Log:
    * string.c (search_nonascii): Use VALUE instead of unsigned long
      because VALUE can be the fastest unsigned integer type.
      On LLP64 unsigned long isn't the fastest.
    * string.c (str_strlen): ditto.
    * string.c (str_utf8_nth): ditto.
    * string.c (count_utf8_lead_bytes_with_ulong): ditto.
    
    * string.c (count_utf8_lead_bytes_with_word): renamed.

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/version.h?r1=15731&r2=15730&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15731&r2=15730&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15731&r2=15730&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_m17n.rb?r1=15731&r2=15730&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15730)
+++ ChangeLog	(revision 15731)
@@ -1,3 +1,14 @@
+Sat Mar  8 06:53:48 2008  NARUSE, Yui  <naruse@r...>
+
+	* string.c (search_nonascii): Use VALUE instead of unsigned long
+	  because VALUE can be the fastest unsigned integer type.
+	  On LLP64 unsigned long isn't the fastest.
+	* string.c (str_strlen): ditto.
+	* string.c (str_utf8_nth): ditto.
+	* string.c (count_utf8_lead_bytes_with_ulong): ditto.
+
+	* string.c (count_utf8_lead_bytes_with_word): renamed.
+
 Fri Mar  7 21:27:43 2008  Yusuke Endoh  <mame@t...>
 
 	* bignum.c: fix indent.
Index: string.c
===================================================================
--- string.c	(revision 15730)
+++ string.c	(revision 15731)
@@ -118,22 +118,22 @@
 static inline const char *
 search_nonascii(const char *p, const char *e)
 {
-#if ULONG_MAX == 18446744073709551615UL
-# define NONASCII_MASK 0x8080808080808080UL
-#elif ULONG_MAX == 4294967295UL
+#if SIZEOF_VALUE == 8
+# define NONASCII_MASK 0x8080808080808080LL
+#elif SIZEOF_VALUE == 4
 # define NONASCII_MASK 0x80808080UL
 #endif
 #ifdef NONASCII_MASK
-    if (sizeof(long) * 2 < e - p) {
-        const unsigned long *s, *t;
-        const VALUE lowbits = sizeof(unsigned long) - 1;
-        s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
+    if (sizeof(VALUE) * 2 < e - p) {
+        const VALUE *s, *t;
+        const VALUE lowbits = sizeof(VALUE) - 1;
+        s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
         while (p < (const char *)s) {
             if (!ISASCII(*p))
                 return p;
             p++;
         }
-        t = (const unsigned long*)(~lowbits & (VALUE)e);
+        t = (const VALUE*)(~lowbits & (VALUE)e);
         while (s < t) {
             if (*s & NONASCII_MASK) {
                 t = s;
@@ -757,19 +757,19 @@
 
 #ifdef NONASCII_MASK
 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
-static inline const long
-count_utf8_lead_bytes_with_ulong(const unsigned long *s)
+static inline const VALUE
+count_utf8_lead_bytes_with_word(const VALUE *s)
 {
-    unsigned long d = *s;
+    VALUE d = *s;
     d |= ~(d>>1);
     d >>= 6;
     d &= NONASCII_MASK >> 7;
     d += (d>>8);
     d += (d>>16);
-#if NONASCII_MASK == 0x8080808080808080UL
+#if SIZEOF_VALUE == 8
     d += (d>>32);
 #endif
-    return (long)(d&0xF);
+    return (d&0xF);
 }
 #endif
 
@@ -786,18 +786,18 @@
 #ifdef NONASCII_MASK
     if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
         enc == rb_utf8_encoding()) {
-        long len = 0;
-	if (sizeof(long) * 2 < e - p) {
-	    const unsigned long *s, *t;
-	    const VALUE lowbits = sizeof(unsigned long) - 1;
-	    s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
-	    t = (const unsigned long*)(~lowbits & (VALUE)e);
+        VALUE len = 0;
+	if (sizeof(VALUE) * 2 < e - p) {
+	    const VALUE *s, *t;
+	    const VALUE lowbits = sizeof(VALUE) - 1;
+	    s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
+	    t = (const VALUE*)(~lowbits & (VALUE)e);
 	    while (p < (const char *)s) {
 		if (is_utf8_lead_byte(*p)) len++;
 		p++;
 	    }
 	    while (s < t) {
-		len += count_utf8_lead_bytes_with_ulong(s);
+		len += count_utf8_lead_bytes_with_word(s);
 		s++;
 	    }
 	    p = (const char *)s;
@@ -806,7 +806,7 @@
 	    if (is_utf8_lead_byte(*p)) len++;
 	    p++;
 	}
-	return len;
+	return (long)len;
     }
 #endif
     n = rb_enc_strlen_cr(p, e, enc, &cr);
@@ -1168,29 +1168,27 @@
 static char *
 str_utf8_nth(const char *p, const char *e, int nth)
 {
-    if (sizeof(long) * 2 < nth) {
-	const unsigned long *s, *t;
-	const VALUE lowbits = sizeof(unsigned long) - 1;
-	s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
-	t = (const unsigned long*)(~lowbits & (VALUE)e);
+    if (sizeof(VALUE) * 2 < nth) {
+	const VALUE *s, *t;
+	const VALUE lowbits = sizeof(VALUE) - 1;
+	s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
+	t = (const VALUE*)(~lowbits & (VALUE)e);
 	while (p < (const char *)s) {
 	    if (is_utf8_lead_byte(*p)) nth--;
 	    p++;
 	}
 	do {
-	    nth -= count_utf8_lead_bytes_with_ulong(s);
+	    nth -= count_utf8_lead_bytes_with_word(s);
 	    s++;
-	} while (s < t && sizeof(long) <= nth);
+	} while (s < t && sizeof(VALUE) <= nth);
 	p = (char *)s;
     }
-    if (0 < nth) {
-	while (p < e) {
-	    if (is_utf8_lead_byte(*p)) {
-		nth--;
-		if (nth < 0) break;
-	    }
-	    p++;
+    while (p < e) {
+	if (is_utf8_lead_byte(*p)) {
+	    if (nth == 0) break;
+	    nth--;
 	}
+	p++;
     }
     return (char *)p;
 }
Index: version.h
===================================================================
--- version.h	(revision 15730)
+++ version.h	(revision 15731)
@@ -1,7 +1,7 @@
 #define RUBY_VERSION "1.9.0"
-#define RUBY_RELEASE_DATE "2008-03-07"
+#define RUBY_RELEASE_DATE "2008-03-08"
 #define RUBY_VERSION_CODE 190
-#define RUBY_RELEASE_CODE 20080307
+#define RUBY_RELEASE_CODE 20080308
 #define RUBY_PATCHLEVEL 0
 
 #define RUBY_VERSION_MAJOR 1
@@ -9,7 +9,7 @@
 #define RUBY_VERSION_TEENY 0
 #define RUBY_RELEASE_YEAR 2008
 #define RUBY_RELEASE_MONTH 3
-#define RUBY_RELEASE_DAY 7
+#define RUBY_RELEASE_DAY 8
 
 #ifdef RUBY_EXTERN
 RUBY_EXTERN const char ruby_version[];
Index: test/ruby/test_m17n.rb
===================================================================
--- test/ruby/test_m17n.rb	(revision 15730)
+++ test/ruby/test_m17n.rb	(revision 15731)
@@ -819,6 +819,8 @@
     assert_equal("\u{3044}", s[27])
     assert_equal("\u{3046}", s[28])
     assert_equal("\u{3048}", s[29])
+    s = "abcdefghijklmnopqrstuvw\u{3042 3044 3046 3048 304A}"
+    assert_equal("\u{3044}", s[24])
   end
 
   def test_str_aref_len

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/