ruby-changes:3640
From: ko1@a...
Date: Sat, 19 Jan 2008 22:43:03 +0900 (JST)
Subject: [ruby-changes:3640] akr - Ruby:r15129 (trunk): * string.c (coderange_scan): don't call mbclen functions for ASCII
akr 2008-01-19 22:42:50 +0900 (Sat, 19 Jan 2008)
New Revision: 15129
Modified files:
trunk/ChangeLog
trunk/string.c
Log:
* string.c (coderange_scan): don't call mbclen functions for ASCII
characters with ASCII compatible encoding.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15129&r2=15128&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15129&r2=15128&diff_format=u
Index: ChangeLog
===================================================================
--- ChangeLog (revision 15128)
+++ ChangeLog (revision 15129)
@@ -1,3 +1,8 @@
+Sat Jan 19 22:41:39 2008 Tanaka Akira <akr@f...>
+
+ * string.c (coderange_scan): don't call mbclen functions for ASCII
+ characters with ASCII compatible encoding.
+
Sat Jan 19 21:00:34 2008 Tanaka Akira <akr@f...>
* lib/rdoc/template.rb (RDoc): defined to avoid uninitialized constant
Index: string.c
===================================================================
--- string.c (revision 15128)
+++ string.c (revision 15129)
@@ -115,40 +115,92 @@
VALUE rb_fs;
+static inline const char *
+search_nonascii(const char *p, const char *e)
+{
+#if ULONG_MAX == 18446744073709551615UL
+# define NONASCII_MASK 0x8080808080808080UL
+#elif ULONG_MAX == 4294967295UL
+# define NONASCII_MASK 0x80808080UL
+#endif
+#ifdef NONASCII_MASK
+ if (sizeof(long) * 2 < e - p) {
+ const unsigned long *s, *t;
+ const VALUE lowbits = sizeof(unsigned long) - 1;
+ s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits));
+ t = (const unsigned long*)(~lowbits & (VALUE)e);
+ while (p < (const char *)s) {
+ if (!ISASCII(*p))
+ return p;
+ p++;
+ }
+ while (s < t) {
+ if (*s & NONASCII_MASK) {
+ t = s;
+ break;
+ }
+ s++;
+ }
+ p = (const char *)t;
+ }
+#endif
+ while (p < e) {
+ if (!ISASCII(*p))
+ return p;
+ p++;
+ }
+ return NULL;
+}
+
static int
coderange_scan(const char *p, long len, rb_encoding *enc)
{
const char *e = p + len;
- int cr;
if (rb_enc_to_index(enc) == 0) {
/* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
+ p = search_nonascii(p, e);
+ return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT;
+ }
+
+ if (rb_enc_asciicompat(enc)) {
+ p = search_nonascii(p, e);
+ if (!p) {
+ return ENC_CODERANGE_7BIT;
+ }
while (p < e) {
- if (!ISASCII((unsigned char)*p)) {
- return ENC_CODERANGE_VALID;
+ int ret = rb_enc_precise_mbclen(p, e, enc);
+ int len = MBCLEN_CHARFOUND(ret);
+ if (!len) {
+ return ENC_CODERANGE_BROKEN;
}
- p++;
+ p += len;
+ if (p < e) {
+ p = search_nonascii(p, e);
+ if (!p) {
+ return ENC_CODERANGE_VALID;
+ }
+ }
}
- return ENC_CODERANGE_7BIT;
+ if (e < p) {
+ return ENC_CODERANGE_BROKEN;
+ }
+ return ENC_CODERANGE_VALID;
}
- cr = rb_enc_asciicompat(enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
while (p < e) {
int ret = rb_enc_precise_mbclen(p, e, enc);
int len = MBCLEN_CHARFOUND(ret);
- if (len) {
- if (len != 1 || !ISASCII((unsigned char)*p)) {
- cr = ENC_CODERANGE_VALID;
- }
- p += len;
+ if (!len) {
+ return ENC_CODERANGE_BROKEN;
}
- else {
- cr = ENC_CODERANGE_BROKEN;
- break;
- }
+ p += len;
}
- return cr;
+ if (e < p) {
+ return ENC_CODERANGE_BROKEN;
+ }
+ return ENC_CODERANGE_VALID;
}
int
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/