ruby-changes:4241
From: ko1@a...
Date: Sat, 8 Mar 2008 18:05:56 +0900 (JST)
Subject: [ruby-changes:4241] naruse - Ruby:r15731 (trunk): * string.c (search_nonascii): Use VALUE instead of unsigned long
naruse 2008-03-08 18:05:34 +0900 (Sat, 08 Mar 2008) New Revision: 15731 Modified files: trunk/ChangeLog trunk/string.c trunk/test/ruby/test_m17n.rb trunk/version.h Log: * string.c (search_nonascii): Use VALUE instead of unsigned long because VALUE can be the fastest unsigned integer type. On LLP64 unsigned long isn't the fastest. * string.c (str_strlen): ditto. * string.c (str_utf8_nth): ditto. * string.c (count_utf8_lead_bytes_with_ulong): ditto. * string.c (count_utf8_lead_bytes_with_word): renamed. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/version.h?r1=15731&r2=15730&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15731&r2=15730&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15731&r2=15730&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_m17n.rb?r1=15731&r2=15730&diff_format=u Index: ChangeLog =================================================================== --- ChangeLog (revision 15730) +++ ChangeLog (revision 15731) @@ -1,3 +1,14 @@ +Sat Mar 8 06:53:48 2008 NARUSE, Yui <naruse@r...> + + * string.c (search_nonascii): Use VALUE instead of unsigned long + because VALUE can be the fastest unsigned integer type. + On LLP64 unsigned long isn't the fastest. + * string.c (str_strlen): ditto. + * string.c (str_utf8_nth): ditto. + * string.c (count_utf8_lead_bytes_with_ulong): ditto. + + * string.c (count_utf8_lead_bytes_with_word): renamed. + Fri Mar 7 21:27:43 2008 Yusuke Endoh <mame@t...> * bignum.c: fix indent. Index: string.c =================================================================== --- string.c (revision 15730) +++ string.c (revision 15731) @@ -118,22 +118,22 @@ static inline const char * search_nonascii(const char *p, const char *e) { -#if ULONG_MAX == 18446744073709551615UL -# define NONASCII_MASK 0x8080808080808080UL -#elif ULONG_MAX == 4294967295UL +#if SIZEOF_VALUE == 8 +# define NONASCII_MASK 0x8080808080808080LL +#elif SIZEOF_VALUE == 4 # define NONASCII_MASK 0x80808080UL #endif #ifdef NONASCII_MASK - if (sizeof(long) * 2 < e - p) { - const unsigned long *s, *t; - const VALUE lowbits = sizeof(unsigned long) - 1; - s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); + if (sizeof(VALUE) * 2 < e - p) { + const VALUE *s, *t; + const VALUE lowbits = sizeof(VALUE) - 1; + s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); while (p < (const char *)s) { if (!ISASCII(*p)) return p; p++; } - t = (const unsigned long*)(~lowbits & (VALUE)e); + t = (const VALUE*)(~lowbits & (VALUE)e); while (s < t) { if (*s & NONASCII_MASK) { t = s; @@ -757,19 +757,19 @@ #ifdef NONASCII_MASK #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80) -static inline const long -count_utf8_lead_bytes_with_ulong(const unsigned long *s) +static inline const VALUE +count_utf8_lead_bytes_with_word(const VALUE *s) { - unsigned long d = *s; + VALUE d = *s; d |= ~(d>>1); d >>= 6; d &= NONASCII_MASK >> 7; d += (d>>8); d += (d>>16); -#if NONASCII_MASK == 0x8080808080808080UL +#if SIZEOF_VALUE == 8 d += (d>>32); #endif - return (long)(d&0xF); + return (d&0xF); } #endif @@ -786,18 +786,18 @@ #ifdef NONASCII_MASK if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && enc == rb_utf8_encoding()) { - long len = 0; - if (sizeof(long) * 2 < e - p) { - const unsigned long *s, *t; - const VALUE lowbits = sizeof(unsigned long) - 1; - s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); - t = (const unsigned long*)(~lowbits & (VALUE)e); + VALUE len = 0; + if (sizeof(VALUE) * 2 < e - p) { + const VALUE *s, *t; + const VALUE lowbits = sizeof(VALUE) - 1; + s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); + t = (const VALUE*)(~lowbits & (VALUE)e); while (p < (const char *)s) { if (is_utf8_lead_byte(*p)) len++; p++; } while (s < t) { - len += count_utf8_lead_bytes_with_ulong(s); + len += count_utf8_lead_bytes_with_word(s); s++; } p = (const char *)s; @@ -806,7 +806,7 @@ if (is_utf8_lead_byte(*p)) len++; p++; } - return len; + return (long)len; } #endif n = rb_enc_strlen_cr(p, e, enc, &cr); @@ -1168,29 +1168,27 @@ static char * str_utf8_nth(const char *p, const char *e, int nth) { - if (sizeof(long) * 2 < nth) { - const unsigned long *s, *t; - const VALUE lowbits = sizeof(unsigned long) - 1; - s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); - t = (const unsigned long*)(~lowbits & (VALUE)e); + if (sizeof(VALUE) * 2 < nth) { + const VALUE *s, *t; + const VALUE lowbits = sizeof(VALUE) - 1; + s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); + t = (const VALUE*)(~lowbits & (VALUE)e); while (p < (const char *)s) { if (is_utf8_lead_byte(*p)) nth--; p++; } do { - nth -= count_utf8_lead_bytes_with_ulong(s); + nth -= count_utf8_lead_bytes_with_word(s); s++; - } while (s < t && sizeof(long) <= nth); + } while (s < t && sizeof(VALUE) <= nth); p = (char *)s; } - if (0 < nth) { - while (p < e) { - if (is_utf8_lead_byte(*p)) { - nth--; - if (nth < 0) break; - } - p++; + while (p < e) { + if (is_utf8_lead_byte(*p)) { + if (nth == 0) break; + nth--; } + p++; } return (char *)p; } Index: version.h =================================================================== --- version.h (revision 15730) +++ version.h (revision 15731) @@ -1,7 +1,7 @@ #define RUBY_VERSION "1.9.0" -#define RUBY_RELEASE_DATE "2008-03-07" +#define RUBY_RELEASE_DATE "2008-03-08" #define RUBY_VERSION_CODE 190 -#define RUBY_RELEASE_CODE 20080307 +#define RUBY_RELEASE_CODE 20080308 #define RUBY_PATCHLEVEL 0 #define RUBY_VERSION_MAJOR 1 @@ -9,7 +9,7 @@ #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_YEAR 2008 #define RUBY_RELEASE_MONTH 3 -#define RUBY_RELEASE_DAY 7 +#define RUBY_RELEASE_DAY 8 #ifdef RUBY_EXTERN RUBY_EXTERN const char ruby_version[]; Index: test/ruby/test_m17n.rb =================================================================== --- test/ruby/test_m17n.rb (revision 15730) +++ test/ruby/test_m17n.rb (revision 15731) @@ -819,6 +819,8 @@ assert_equal("\u{3044}", s[27]) assert_equal("\u{3046}", s[28]) assert_equal("\u{3048}", s[29]) + s = "abcdefghijklmnopqrstuvw\u{3042 3044 3046 3048 304A}" + assert_equal("\u{3044}", s[24]) end def test_str_aref_len -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/