ruby-changes:2056
From: ko1@a...
Date: 28 Sep 2007 18:07:21 +0900
Subject: [ruby-changes:2056] matz - Ruby:r13547 (trunk): * string.c (rb_str_comparable): need not to check asciicompat here.
matz 2007-09-28 18:07:02 +0900 (Fri, 28 Sep 2007) New Revision: 13547 Modified files: trunk/ChangeLog trunk/encoding.c trunk/string.c Log: * string.c (rb_str_comparable): need not to check asciicompat here. * encoding.c (rb_enc_check): ditto. * string.c (rb_enc_str_coderange): tuned a bit; no broken check. * encoding.c (rb_enc_check): new encoding comparison criteria. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=13547&r2=13546 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13547&r2=13546 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=13547&r2=13546 Index: encoding.c =================================================================== --- encoding.c (revision 13546) +++ encoding.c (revision 13547) @@ -196,26 +196,28 @@ return rb_enc_from_index(idx1); } - if (idx1 == 0) { - enc = rb_enc_from_index(idx2); - if (rb_enc_asciicompat(enc)) { - return enc; + if (BUILTIN_TYPE(str1) != T_STRING) { + VALUE tmp = str1; + str1 = str2; + str2 = tmp; + } + if (BUILTIN_TYPE(str1) == T_STRING) { + int cr1, cr2; + + cr1 = rb_enc_str_coderange(str1); + if (BUILTIN_TYPE(str2) == T_STRING) { + cr2 = rb_enc_str_coderange(str2); + if (cr1 != cr2) { + /* may need to handle ENC_CODERANGE_BROKEN */ + if (cr1 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(idx2); + if (cr2 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(idx1); + } + if (cr1 == ENC_CODERANGE_SINGLE) return ONIG_ENCODING_ASCII; } - } - else if (idx2 == 0) { - enc = rb_enc_from_index(idx1); - if (rb_enc_asciicompat(enc)) { + if (cr1 == ENC_CODERANGE_SINGLE && + rb_enc_asciicompat(enc = rb_enc_from_index(idx2))) return enc; - } } - if (BUILTIN_TYPE(str1) == T_STRING && - BUILTIN_TYPE(str2) == T_STRING && - rb_enc_asciicompat(rb_enc_from_index(idx1)) && - rb_enc_asciicompat(rb_enc_from_index(idx2)) && - rb_enc_str_coderange(str1) == ENC_CODERANGE_SINGLE && - rb_enc_str_coderange(str2) == ENC_CODERANGE_SINGLE) { - return ONIG_ENCODING_ASCII; - } rb_raise(rb_eArgError, "character encodings differ"); } Index: ChangeLog =================================================================== --- ChangeLog (revision 13546) +++ ChangeLog (revision 13547) @@ -65,6 +65,16 @@ * benchmark/driver.rb: fix file selection algorithm. +Fri Sep 28 02:05:42 2007 Yukihiro Matsumoto <matz@r...> + + * string.c (rb_str_comparable): need not to check asciicompat here. + + * encoding.c (rb_enc_check): ditto. + + * string.c (rb_enc_str_coderange): tuned a bit; no broken check. + + * encoding.c (rb_enc_check): new encoding comparison criteria. + Thu Sep 27 17:36:28 2007 NAKAMURA Usaku <usa@r...> * win32/REAMDE.win32: follow recent changes. Index: string.c =================================================================== --- string.c (revision 13546) +++ string.c (revision 13547) @@ -99,26 +99,32 @@ int rb_enc_str_coderange(VALUE str) { - long i; int cr = ENC_CODERANGE(str); if (cr == ENC_CODERANGE_UNKNOWN) { - cr = ENC_CODERANGE_SINGLE; - for (i = 0; i < RSTRING_LEN(str); ++i) { - const char *p = &RSTRING_PTR(str)[i]; - int c = (unsigned char)*p; + rb_encoding *enc = rb_enc_get(str); - if (!ISASCII(c)) { - c = rb_enc_codepoint(p, RSTRING_END(str), rb_enc_get(str)); - if (c == -1) { - cr = ENC_CODERANGE_BROKEN; - } - else { + if (!rb_enc_asciicompat(enc)) { + cr = ENC_CODERANGE_MULTI; + ENC_CODERANGE_SET(str, cr); + return cr; + } + else { + const char *p = RSTRING_PTR(str); + const char *e = p + RSTRING_LEN(str); + + cr = ENC_CODERANGE_SINGLE; + while (p < e) { + int c = (unsigned char)*p; + + if (c > 0x80) { cr = ENC_CODERANGE_MULTI; + break; } + p++; } + ENC_CODERANGE_SET(str, cr); } - ENC_CODERANGE_SET(str, cr); } return cr; } @@ -1169,8 +1175,7 @@ if (e && is_ascii_string(str)) { e = 0; } - return hash((const void *)RSTRING_PTR(str), RSTRING_LEN(str), - e); + return hash((const void *)RSTRING_PTR(str), RSTRING_LEN(str), e); } /* @@ -1196,8 +1201,6 @@ int idx2 = rb_enc_get_index(str2); if (idx1 == idx2) return Qtrue; - if (!rb_enc_asciicompat(rb_enc_from_index(idx1))) return Qfalse; - if (!rb_enc_asciicompat(rb_enc_from_index(idx2))) return Qfalse; if (!is_ascii_string(str1)) return Qfalse; if (!is_ascii_string(str2)) return Qfalse; return Qtrue; @@ -1263,7 +1266,6 @@ return Qfalse; if (!rb_str_comparable(str1, str2)) return Qfalse; - if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), lesser(RSTRING_LEN(str1), RSTRING_LEN(str2))) == 0) return Qtrue; -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml