ruby-changes:4060
From: ko1@a...
Date: Tue, 19 Feb 2008 21:18:18 +0900 (JST)
Subject: [ruby-changes:4060] naruse - Ruby:r15550 (trunk): * string.c (rb_enc_strlen_cr): get length with coderange scan.
naruse 2008-02-19 21:18:03 +0900 (Tue, 19 Feb 2008) New Revision: 15550 Modified files: trunk/ChangeLog trunk/string.c Log: * string.c (rb_enc_strlen_cr): get length with coderange scan. * string.c (str_strlen): use rb_enc_strlen_cr. [ruby-dev:33849] http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15550&r2=15549&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15550&r2=15549&diff_format=u Index: ChangeLog =================================================================== --- ChangeLog (revision 15549) +++ ChangeLog (revision 15550) @@ -1,3 +1,9 @@ +Tue Feb 19 21:11:49 2008 NARUSE, Yui <naruse@r...> + + * string.c (rb_enc_strlen_cr): get length with coderange scan. + + * string.c (str_strlen): use rb_enc_strlen_cr. [ruby-dev:33849] + Tue Feb 19 20:49:49 2008 Nobuyoshi Nakada <nobu@r...> * eval.c (rb_raise_jump): moved adjustment for control frame. Index: string.c =================================================================== --- string.c (revision 15549) +++ string.c (revision 15550) @@ -128,12 +128,12 @@ const unsigned long *s, *t; const VALUE lowbits = sizeof(unsigned long) - 1; s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); - t = (const unsigned long*)(~lowbits & (VALUE)e); while (p < (const char *)s) { if (!ISASCII(*p)) return p; p++; } + t = (const unsigned long*)(~lowbits & (VALUE)e); while (s < t) { if (*s & NONASCII_MASK) { t = s; @@ -619,10 +619,63 @@ return c; } +long +rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr) +{ + long c; + const char *q; + int ret; + + *cr = 0; + if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { + return (e - p) / rb_enc_mbminlen(enc); + } + else if (rb_enc_asciicompat(enc)) { + c = 0; + while (p < e) { + if (ISASCII(*p)) { + q = search_nonascii(p, e); + if (!q) { + return c + (e - p); + } + c += q - p; + p = q; + } + ret = rb_enc_precise_mbclen(p, e, enc); + if (MBCLEN_CHARFOUND_P(ret)) { + *cr |= ENC_CODERANGE_VALID; + p += MBCLEN_CHARFOUND_LEN(ret); + } + else { + *cr = ENC_CODERANGE_BROKEN; + p++; + } + c++; + } + if (!*cr) *cr = ENC_CODERANGE_7BIT; + return c; + } + + for (c=0; p<e; c++) { + ret = rb_enc_precise_mbclen(p, e, enc); + if (MBCLEN_CHARFOUND_P(ret)) { + *cr |= ENC_CODERANGE_VALID; + p += MBCLEN_CHARFOUND_LEN(ret); + } + else { + *cr = ENC_CODERANGE_BROKEN; + p++; + } + } + if (!*cr) *cr = ENC_CODERANGE_7BIT; + return c; +} + static long str_strlen(VALUE str, rb_encoding *enc) { const char *p, *e; + int n, cr; if (single_byte_optimizable(str)) return RSTRING_LEN(str); if (!enc) enc = STR_ENC_GET(str); @@ -661,7 +714,11 @@ return len; } #endif - return rb_enc_strlen(p, e, enc); + n = rb_enc_strlen_cr(p, e, enc, &cr); + if (cr) { + ENC_CODERANGE_SET(str, cr); + } + return n; } /* -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/