ruby-changes:4021
From: ko1@a...
Date: Sat, 16 Feb 2008 20:53:19 +0900 (JST)
Subject: [ruby-changes:4021] naruse - Ruby:r15511 (trunk): * string.c (rb_str_substr): optimized for UTF-8.
naruse 2008-02-16 20:53:04 +0900 (Sat, 16 Feb 2008) New Revision: 15511 Modified files: trunk/ChangeLog trunk/string.c Log: * string.c (rb_str_substr): optimized for UTF-8. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15511&r2=15510&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15511&r2=15510&diff_format=u Index: ChangeLog =================================================================== --- ChangeLog (revision 15510) +++ ChangeLog (revision 15511) @@ -1,3 +1,7 @@ +Sat Feb 16 20:49:34 2008 NARUSE, Yui <naruse@r...> + + * string.c (rb_str_substr): optimized for UTF-8. + Sat Feb 16 18:13:53 2008 Tanaka Akira <akr@f...> * encoding.c (rb_enc_compatible): check encoding incapable arguments. @@ -12,7 +16,7 @@ Sat Feb 16 18:25:14 2008 NARUSE, Yui <naruse@r...> - * string.c (str_strlen): little more optimize. + * string.c (str_strlen): little more optimization. (rb_enc_nth): remove needless variable 'c'. Sat Feb 16 18:00:13 2008 Tanaka Akira <akr@f...> Index: string.c =================================================================== --- string.c (revision 15510) +++ string.c (revision 15511) @@ -1011,6 +1011,58 @@ return pp - p; } +#ifdef NONASCII_MASK +static char * +str_utf8_nth(const char *p, const char *e, int nth) +{ + if (sizeof(long) * 2 < nth) { + const unsigned long *s, *t; + const VALUE lowbits = sizeof(unsigned long) - 1; + s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); + t = (const unsigned long*)(~lowbits & (VALUE)e); + for (; p<(const char *)s && 0<nth; p++) { + if (((*p)&0xC0) != 0x80) nth--; + } + while (s < t) { + unsigned long d = *s++; + d = ~d | (d<<1); + d &= NONASCII_MASK; + d >>= 7; + d += (d>>8); + d += (d>>16); +#if NONASCII_MASK == 0x8080808080808080UL + d += (d>>32); +#endif + nth -= (long)(d&0xF); + if (nth < 8) { + t = s; + break; + } + } + p = (char *)t; + } + if (0 < nth) { + while (p < e) { + if (((*p)&0xC0) != 0x80) { + nth--; + if (nth < 0) + break; + } + p++; + } + } + return (char *)p; +} + +static int +str_utf8_offset(const char *p, const char *e, int nth) +{ + const char *pp = str_utf8_nth(p, e, nth); + if (!pp) return e - p; + return pp - p; +} +#endif + static long str_sublen(VALUE str, long pos, rb_encoding *enc) { @@ -1082,6 +1134,13 @@ if (len == 0) { p = 0; } +#ifdef NONASCII_MASK + else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && + enc == rb_utf8_encoding()) { + p = str_utf8_nth(s, e, beg); + len = str_utf8_offset(p, e, len); + } +#endif else if ((p = str_nth(s, e, beg, enc, singlebyte)) == e) { len = 0; } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/