ruby-changes:2038
From: ko1@a...
Date: 27 Sep 2007 04:47:24 +0900
Subject: [ruby-changes:2038] nobu - Ruby:r13529 (trunk): * encoding.c (rb_enc_associate_index): deal with ASCII compatible
nobu 2007-09-27 04:46:58 +0900 (Thu, 27 Sep 2007)
New Revision: 13529
Modified files:
trunk/ChangeLog
trunk/encoding.c
trunk/include/ruby/encoding.h
trunk/parse.y
trunk/string.c
Log:
* encoding.c (rb_enc_associate_index): deal with ASCII compatible
flags.
* encoding.c (rb_enc_check): allow ASCII compatible strings.
* parse.y (rb_intern_str): use ASCII encoding for ASCII string.
* string.c (rb_enc_str_coderange): check for code-range.
* string.c (rb_str_modify): clear code-range flags.
* string.c (rb_str_hash, rb_str_eql): ASCII compatible strings are
comparable.
* include/ruby/encoding.h: added code-range flags.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=13529&r2=13528
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=13529&r2=13528
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13529&r2=13528
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=13529&r2=13528
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=13529&r2=13528
Index: encoding.c
===================================================================
--- encoding.c (revision 13528)
+++ encoding.c (revision 13529)
@@ -122,6 +122,10 @@
rb_enc_associate_index(VALUE obj, int idx)
{
enc_check_capable(obj);
+ if (!ENC_CODERANGE_ASCIIONLY(obj) ||
+ !rb_enc_asciicompat(rb_enc_from_index(idx))) {
+ ENC_CODERANGE_CLEAR(obj);
+ }
if (idx < ENCODING_INLINE_MAX) {
ENCODING_SET(obj, idx);
return;
@@ -204,6 +208,14 @@
return enc;
}
}
+ if (BUILTIN_TYPE(str1) == T_STRING &&
+ BUILTIN_TYPE(str2) == T_STRING &&
+ rb_enc_asciicompat(rb_enc_from_index(idx1)) &&
+ rb_enc_asciicompat(rb_enc_from_index(idx2)) &&
+ rb_enc_str_coderange(str1) == ENC_CODERANGE_SINGLE &&
+ rb_enc_str_coderange(str2) == ENC_CODERANGE_SINGLE) {
+ return ONIG_ENCODING_ASCII;
+ }
rb_raise(rb_eArgError, "character encodings differ");
}
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h (revision 13528)
+++ include/ruby/encoding.h (revision 13529)
@@ -24,6 +24,17 @@
} while (0)
#define ENCODING_GET(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
+#define ENC_CODERANGE_MASK (FL_USER12|FL_USER13)
+#define ENC_CODERANGE_UNKNOWN 0
+#define ENC_CODERANGE_SINGLE FL_USER12
+#define ENC_CODERANGE_MULTI FL_USER13
+#define ENC_CODERANGE_BROKEN (FL_USER12|FL_USER13)
+#define ENC_CODERANGE(obj) (RBASIC(obj)->flags & ENC_CODERANGE_MASK)
+#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_SINGLE)
+#define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags &= ~ENC_CODERANGE_MASK | (cr))
+#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
+
+
typedef OnigEncodingType rb_encoding;
int rb_enc_to_index(rb_encoding*);
@@ -80,5 +91,6 @@
int rb_enc_tolower(int c, rb_encoding *enc);
ID rb_intern3(const char*, long, rb_encoding*);
int rb_enc_symname_p(const char*, rb_encoding*);
+int rb_enc_str_coderange(VALUE);
#endif /* RUBY_ENCODING_H */
Index: ChangeLog
===================================================================
--- ChangeLog (revision 13528)
+++ ChangeLog (revision 13529)
@@ -1,3 +1,21 @@
+Thu Sep 27 04:46:31 2007 Nobuyoshi Nakada <nobu@r...>
+
+ * encoding.c (rb_enc_associate_index): deal with ASCII compatible
+ flags.
+
+ * encoding.c (rb_enc_check): allow ASCII compatible strings.
+
+ * parse.y (rb_intern_str): use ASCII encoding for ASCII string.
+
+ * string.c (rb_enc_str_coderange): check for code-range.
+
+ * string.c (rb_str_modify): clear code-range flags.
+
+ * string.c (rb_str_hash, rb_str_eql): ASCII compatible strings are
+ comparable.
+
+ * include/ruby/encoding.h: added code-range flags.
+
Thu Sep 27 04:40:47 2007 Nobuyoshi Nakada <nobu@r...>
* gc.c (rb_mark_set): new function to mark keys.
Index: string.c
===================================================================
--- string.c (revision 13528)
+++ string.c (revision 13529)
@@ -92,8 +92,37 @@
}\
} while (0)
+#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_SINGLE)
+
VALUE rb_fs;
+int
+rb_enc_str_coderange(VALUE str)
+{
+ long i;
+ int cr = ENC_CODERANGE(str);
+
+ if (cr == ENC_CODERANGE_UNKNOWN) {
+ cr = ENC_CODERANGE_SINGLE;
+ for (i = 0; i < RSTRING_LEN(str); ++i) {
+ const char *p = &RSTRING_PTR(str)[i];
+ int c = (unsigned char)*p;
+
+ if (!ISASCII(c)) {
+ c = rb_enc_codepoint(p, RSTRING_END(str), rb_enc_get(str));
+ if (c == -1) {
+ cr = ENC_CODERANGE_BROKEN;
+ }
+ else {
+ cr = ENC_CODERANGE_MULTI;
+ }
+ }
+ }
+ ENC_CODERANGE_SET(str, cr);
+ }
+ return cr;
+}
+
static inline void
str_mod_check(VALUE s, char *p, long len)
{
@@ -553,8 +582,8 @@
return rb_str_format(1, &arg, str);
}
-static int
-str_independent(VALUE str)
+static void
+str_modifiable(VALUE str)
{
if (FL_TEST(str, STR_TMPLOCK)) {
rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
@@ -562,6 +591,12 @@
if (OBJ_FROZEN(str)) rb_error_frozen("string");
if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
rb_raise(rb_eSecurityError, "Insecure: can't modify string");
+}
+
+static int
+str_independent(VALUE str)
+{
+ str_modifiable(str);
if (!STR_SHARED_P(str)) return 1;
if (STR_EMBED_P(str)) return 1;
return 0;
@@ -589,6 +624,7 @@
{
if (!str_independent(str))
str_make_independent(str);
+ ENC_CODERANGE_CLEAR(str);
}
void
@@ -1129,8 +1165,12 @@
int
rb_str_hash(VALUE str)
{
+ int e = rb_enc_get_index(str);
+ if (e && is_ascii_string(str)) {
+ e = 0;
+ }
return hash((const void *)RSTRING_PTR(str), RSTRING_LEN(str),
- rb_enc_get_index(str));
+ e);
}
/*
@@ -1149,18 +1189,6 @@
#define lesser(a,b) (((a)>(b))?(b):(a))
-static int
-is_ascii_string(VALUE str)
-{
- long i;
-
- for (i = 0; i < RSTRING_LEN(str); ++i) {
- int c = (unsigned char)RSTRING_PTR(str)[i];
- if (!ISASCII(c)) return Qfalse;
- }
- return Qtrue;
-}
-
int
rb_str_comparable(VALUE str1, VALUE str2)
{
@@ -1234,8 +1262,7 @@
if (TYPE(str2) != T_STRING || RSTRING_LEN(str1) != RSTRING_LEN(str2))
return Qfalse;
- if (rb_enc_get_index(str1) != rb_enc_get_index(str2))
- return Qfalse;
+ if (!rb_str_comparable(str1, str2)) return Qfalse;
if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2),
lesser(RSTRING_LEN(str1), RSTRING_LEN(str2))) == 0)
@@ -3529,7 +3556,7 @@
static VALUE
rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
{
- rb_encoding *enc;
+ rb_encoding *enc = 0;
char *s, *send, *t;
VALUE del = 0, nodel = 0;
int modify = 0;
@@ -3736,7 +3763,7 @@
static VALUE
rb_str_count(int argc, VALUE *argv, VALUE str)
{
- rb_encoding *enc;
+ rb_encoding *enc = 0;
VALUE del = 0, nodel = 0;
char *s, *send;
int i;
@@ -5065,12 +5092,35 @@
}
+/*
+ * call-seq:
+ * str.encoding => str
+ *
+ * Retruns the encoding name.
+ */
+
static VALUE
str_encoding(VALUE str)
{
return rb_str_new2(rb_enc_name(rb_enc_get(str)));
}
+
+/*
+ * call-seq:
+ * str.associate_encoding(encoding) => str
+ *
+ * Changes the encoding to +encoding+ and returns self.
+ */
+
+static VALUE
+rb_str_associate_encoding(VALUE str, VALUE encname)
+{
+ str_modifiable(str);
+ rb_enc_associate(str, rb_enc_find(StringValueCStr(encname)));
+ return str;
+}
+
/**********************************************************************
* Document-class: Symbol
*
@@ -5482,6 +5532,7 @@
rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
rb_define_method(rb_cString, "encoding", str_encoding, 0);
+ rb_define_method(rb_cString, "associate_encoding", rb_str_associate_encoding, 1);
id_to_s = rb_intern("to_s");
Index: parse.y
===================================================================
--- parse.y (revision 13528)
+++ parse.y (revision 13529)
@@ -8548,7 +8548,14 @@
ID
rb_intern_str(VALUE str)
{
- ID id = rb_intern3(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str));
+ int idx = 0;
+ ID id;
+
+ if (rb_enc_str_coderange(str) != ENC_CODERANGE_SINGLE) {
+ idx = rb_enc_get_index(str);
+ }
+ id = rb_intern3(RSTRING_PTR(str), RSTRING_LEN(str),
+ rb_enc_from_index(idx));
RB_GC_GUARD(str);
return id;
}
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml