ruby-changes:3640
From: ko1@a...
Date: Sat, 19 Jan 2008 22:43:03 +0900 (JST)
Subject: [ruby-changes:3640] akr - Ruby:r15129 (trunk): * string.c (coderange_scan): don't call mbclen functions for ASCII
akr 2008-01-19 22:42:50 +0900 (Sat, 19 Jan 2008) New Revision: 15129 Modified files: trunk/ChangeLog trunk/string.c Log: * string.c (coderange_scan): don't call mbclen functions for ASCII characters with ASCII compatible encoding. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15129&r2=15128&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15129&r2=15128&diff_format=u Index: ChangeLog =================================================================== --- ChangeLog (revision 15128) +++ ChangeLog (revision 15129) @@ -1,3 +1,8 @@ +Sat Jan 19 22:41:39 2008 Tanaka Akira <akr@f...> + + * string.c (coderange_scan): don't call mbclen functions for ASCII + characters with ASCII compatible encoding. + Sat Jan 19 21:00:34 2008 Tanaka Akira <akr@f...> * lib/rdoc/template.rb (RDoc): defined to avoid uninitialized constant Index: string.c =================================================================== --- string.c (revision 15128) +++ string.c (revision 15129) @@ -115,40 +115,92 @@ VALUE rb_fs; +static inline const char * +search_nonascii(const char *p, const char *e) +{ +#if ULONG_MAX == 18446744073709551615UL +# define NONASCII_MASK 0x8080808080808080UL +#elif ULONG_MAX == 4294967295UL +# define NONASCII_MASK 0x80808080UL +#endif +#ifdef NONASCII_MASK + if (sizeof(long) * 2 < e - p) { + const unsigned long *s, *t; + const VALUE lowbits = sizeof(unsigned long) - 1; + s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); + t = (const unsigned long*)(~lowbits & (VALUE)e); + while (p < (const char *)s) { + if (!ISASCII(*p)) + return p; + p++; + } + while (s < t) { + if (*s & NONASCII_MASK) { + t = s; + break; + } + s++; + } + p = (const char *)t; + } +#endif + while (p < e) { + if (!ISASCII(*p)) + return p; + p++; + } + return NULL; +} + static int coderange_scan(const char *p, long len, rb_encoding *enc) { const char *e = p + len; - int cr; if (rb_enc_to_index(enc) == 0) { /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ + p = search_nonascii(p, e); + return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT; + } + + if (rb_enc_asciicompat(enc)) { + p = search_nonascii(p, e); + if (!p) { + return ENC_CODERANGE_7BIT; + } while (p < e) { - if (!ISASCII((unsigned char)*p)) { - return ENC_CODERANGE_VALID; + int ret = rb_enc_precise_mbclen(p, e, enc); + int len = MBCLEN_CHARFOUND(ret); + if (!len) { + return ENC_CODERANGE_BROKEN; } - p++; + p += len; + if (p < e) { + p = search_nonascii(p, e); + if (!p) { + return ENC_CODERANGE_VALID; + } + } } - return ENC_CODERANGE_7BIT; + if (e < p) { + return ENC_CODERANGE_BROKEN; + } + return ENC_CODERANGE_VALID; } - cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; while (p < e) { int ret = rb_enc_precise_mbclen(p, e, enc); int len = MBCLEN_CHARFOUND(ret); - if (len) { - if (len != 1 || !ISASCII((unsigned char)*p)) { - cr = ENC_CODERANGE_VALID; - } - p += len; + if (!len) { + return ENC_CODERANGE_BROKEN; } - else { - cr = ENC_CODERANGE_BROKEN; - break; - } + p += len; } - return cr; + if (e < p) { + return ENC_CODERANGE_BROKEN; + } + return ENC_CODERANGE_VALID; } int -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/