ruby-changes:4014
From: ko1@a...
Date: Sat, 16 Feb 2008 16:16:53 +0900 (JST)
Subject: [ruby-changes:4014] akr - Ruby:r15504 (trunk): * string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen.
akr 2008-02-16 16:16:36 +0900 (Sat, 16 Feb 2008) New Revision: 15504 Modified files: trunk/ChangeLog trunk/string.c Log: * string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen. (str_strlen): UTF-8 character count is only applicable for valid UTF-8 string. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15504&r2=15503&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15504&r2=15503&diff_format=u Index: ChangeLog =================================================================== --- ChangeLog (revision 15503) +++ ChangeLog (revision 15504) @@ -1,3 +1,9 @@ +Sat Feb 16 16:14:35 2008 Tanaka Akira <akr@f...> + + * string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen. + (str_strlen): UTF-8 character count is only applicable for valid + UTF-8 string. + Sat Feb 16 13:16:49 2008 Tanaka Akira <akr@f...> * string.c (rb_str_sub_bang): stringize replacing hash values. Index: string.c =================================================================== --- string.c (revision 15503) +++ string.c (revision 15504) @@ -597,35 +597,7 @@ if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { return (e - p) / rb_enc_mbminlen(enc); } -#ifdef NONASCII_MASK - else if (enc == rb_utf8_encoding()) { - if (sizeof(long) * 2 < e - p) { - const unsigned long *s, *t; - const VALUE lowbits = sizeof(unsigned long) - 1; - s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); - t = (const unsigned long*)(~lowbits & (VALUE)e); - for (c=0; p<(const char *)s; p++) { - if (((*p)&0xC0) != 0x80) c++; - } - while (s < t) { - unsigned long d = *s; - d = (~d ^ (d&(d<<1)))&NONASCII_MASK; - d = (d>>7) + (d>>15); - d = d + (d>>16); -#if NONASCII_MASK == 0x8080808080808080UL - d = d + (d>>32); -#endif - c += (long)(d&0xF); - s++; - } - p = (const char *)t; - } - for (; p<e; p++) { - if (((*p)&0xC0) != 0x80) c++; - } - return c; - } -#endif + else if (rb_enc_asciicompat(enc)) { c = 0; while (p < e) { @@ -658,6 +630,37 @@ if (!enc) enc = STR_ENC_GET(str); p = RSTRING_PTR(str); e = RSTRING_END(str); +#ifdef NONASCII_MASK + if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && + enc == rb_utf8_encoding()) { + len = 0; + if (sizeof(long) * 2 < e - p) { + const unsigned long *s, *t; + const VALUE lowbits = sizeof(unsigned long) - 1; + s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); + t = (const unsigned long*)(~lowbits & (VALUE)e); + for (len=0; p<(const char *)s; p++) { + if (((*p)&0xC0) != 0x80) len++; + } + while (s < t) { + unsigned long d = *s; + d = (~d ^ (d&(d<<1)))&NONASCII_MASK; + d = (d>>7) + (d>>15); + d = d + (d>>16); +#if NONASCII_MASK == 0x8080808080808080UL + d = d + (d>>32); +#endif + len += (long)(d&0xF); + s++; + } + p = (const char *)t; + } + for (; p<e; p++) { + if (((*p)&0xC0) != 0x80) len++; + } + } + else +#endif len = rb_enc_strlen(p, e, enc); if (len < 0) { rb_raise(rb_eArgError, "invalid mbstring sequence"); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/