ruby-changes:4060
From: ko1@a...
Date: Tue, 19 Feb 2008 21:18:18 +0900 (JST)
Subject: [ruby-changes:4060] naruse - Ruby:r15550 (trunk): * string.c (rb_enc_strlen_cr): get length with coderange scan.
naruse 2008-02-19 21:18:03 +0900 (Tue, 19 Feb 2008)
New Revision: 15550
Modified files:
trunk/ChangeLog
trunk/string.c
Log:
* string.c (rb_enc_strlen_cr): get length with coderange scan.
* string.c (str_strlen): use rb_enc_strlen_cr. [ruby-dev:33849]
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15550&r2=15549&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15550&r2=15549&diff_format=u
Index: ChangeLog
===================================================================
--- ChangeLog (revision 15549)
+++ ChangeLog (revision 15550)
@@ -1,3 +1,9 @@
+Tue Feb 19 21:11:49 2008 NARUSE, Yui <naruse@r...>
+
+ * string.c (rb_enc_strlen_cr): get length with coderange scan.
+
+ * string.c (str_strlen): use rb_enc_strlen_cr. [ruby-dev:33849]
+
Tue Feb 19 20:49:49 2008 Nobuyoshi Nakada <nobu@r...>
* eval.c (rb_raise_jump): moved adjustment for control frame.
Index: string.c
===================================================================
--- string.c (revision 15549)
+++ string.c (revision 15550)
@@ -128,12 +128,12 @@
const unsigned long *s, *t;
const VALUE lowbits = sizeof(unsigned long) - 1;
s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
- t = (const unsigned long*)(~lowbits & (VALUE)e);
while (p < (const char *)s) {
if (!ISASCII(*p))
return p;
p++;
}
+ t = (const unsigned long*)(~lowbits & (VALUE)e);
while (s < t) {
if (*s & NONASCII_MASK) {
t = s;
@@ -619,10 +619,63 @@
return c;
}
+long
+rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
+{
+ long c;
+ const char *q;
+ int ret;
+
+ *cr = 0;
+ if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
+ return (e - p) / rb_enc_mbminlen(enc);
+ }
+ else if (rb_enc_asciicompat(enc)) {
+ c = 0;
+ while (p < e) {
+ if (ISASCII(*p)) {
+ q = search_nonascii(p, e);
+ if (!q) {
+ return c + (e - p);
+ }
+ c += q - p;
+ p = q;
+ }
+ ret = rb_enc_precise_mbclen(p, e, enc);
+ if (MBCLEN_CHARFOUND_P(ret)) {
+ *cr |= ENC_CODERANGE_VALID;
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ }
+ else {
+ *cr = ENC_CODERANGE_BROKEN;
+ p++;
+ }
+ c++;
+ }
+ if (!*cr) *cr = ENC_CODERANGE_7BIT;
+ return c;
+ }
+
+ for (c=0; p<e; c++) {
+ ret = rb_enc_precise_mbclen(p, e, enc);
+ if (MBCLEN_CHARFOUND_P(ret)) {
+ *cr |= ENC_CODERANGE_VALID;
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ }
+ else {
+ *cr = ENC_CODERANGE_BROKEN;
+ p++;
+ }
+ }
+ if (!*cr) *cr = ENC_CODERANGE_7BIT;
+ return c;
+}
+
static long
str_strlen(VALUE str, rb_encoding *enc)
{
const char *p, *e;
+ int n, cr;
if (single_byte_optimizable(str)) return RSTRING_LEN(str);
if (!enc) enc = STR_ENC_GET(str);
@@ -661,7 +714,11 @@
return len;
}
#endif
- return rb_enc_strlen(p, e, enc);
+ n = rb_enc_strlen_cr(p, e, enc, &cr);
+ if (cr) {
+ ENC_CODERANGE_SET(str, cr);
+ }
+ return n;
}
/*
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/