[前][次][番号順一覧][スレッド一覧]

ruby-changes:2038

From: ko1@a...
Date: 27 Sep 2007 04:47:24 +0900
Subject: [ruby-changes:2038] nobu - Ruby:r13529 (trunk): * encoding.c (rb_enc_associate_index): deal with ASCII compatible

nobu	2007-09-27 04:46:58 +0900 (Thu, 27 Sep 2007)

  New Revision: 13529

  Modified files:
    trunk/ChangeLog
    trunk/encoding.c
    trunk/include/ruby/encoding.h
    trunk/parse.y
    trunk/string.c

  Log:
    * encoding.c (rb_enc_associate_index): deal with ASCII compatible
      flags.
    
    * encoding.c (rb_enc_check): allow ASCII compatible strings.
    
    * parse.y (rb_intern_str): use ASCII encoding for ASCII string.
    
    * string.c (rb_enc_str_coderange): check for code-range.
    
    * string.c (rb_str_modify): clear code-range flags.
    
    * string.c (rb_str_hash, rb_str_eql): ASCII compatible strings are
      comparable.
    
    * include/ruby/encoding.h: added code-range flags.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=13529&r2=13528
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=13529&r2=13528
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=13529&r2=13528
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=13529&r2=13528
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=13529&r2=13528

Index: encoding.c
===================================================================
--- encoding.c	(revision 13528)
+++ encoding.c	(revision 13529)
@@ -122,6 +122,10 @@
 rb_enc_associate_index(VALUE obj, int idx)
 {
     enc_check_capable(obj);
+    if (!ENC_CODERANGE_ASCIIONLY(obj) ||
+	!rb_enc_asciicompat(rb_enc_from_index(idx))) {
+	ENC_CODERANGE_CLEAR(obj);
+    }
     if (idx < ENCODING_INLINE_MAX) {
 	ENCODING_SET(obj, idx);
 	return;
@@ -204,6 +208,14 @@
 	    return enc;
 	}
     }
+    if (BUILTIN_TYPE(str1) == T_STRING &&
+	BUILTIN_TYPE(str2) == T_STRING &&
+	rb_enc_asciicompat(rb_enc_from_index(idx1)) &&
+	rb_enc_asciicompat(rb_enc_from_index(idx2)) &&
+	rb_enc_str_coderange(str1) == ENC_CODERANGE_SINGLE &&
+	rb_enc_str_coderange(str2) == ENC_CODERANGE_SINGLE) {
+	return ONIG_ENCODING_ASCII;
+    }
     rb_raise(rb_eArgError, "character encodings differ");
 }
 
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 13528)
+++ include/ruby/encoding.h	(revision 13529)
@@ -24,6 +24,17 @@
 } while (0)
 #define ENCODING_GET(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
 
+#define ENC_CODERANGE_MASK	(FL_USER12|FL_USER13)
+#define ENC_CODERANGE_UNKNOWN	0
+#define ENC_CODERANGE_SINGLE	FL_USER12
+#define ENC_CODERANGE_MULTI	FL_USER13
+#define ENC_CODERANGE_BROKEN	(FL_USER12|FL_USER13)
+#define ENC_CODERANGE(obj) (RBASIC(obj)->flags & ENC_CODERANGE_MASK)
+#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_SINGLE)
+#define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags &= ~ENC_CODERANGE_MASK | (cr))
+#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
+
+
 typedef OnigEncodingType rb_encoding;
 
 int rb_enc_to_index(rb_encoding*);
@@ -80,5 +91,6 @@
 int rb_enc_tolower(int c, rb_encoding *enc);
 ID rb_intern3(const char*, long, rb_encoding*);
 int rb_enc_symname_p(const char*, rb_encoding*);
+int rb_enc_str_coderange(VALUE);
 
 #endif /* RUBY_ENCODING_H */
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 13528)
+++ ChangeLog	(revision 13529)
@@ -1,3 +1,21 @@
+Thu Sep 27 04:46:31 2007  Nobuyoshi Nakada  <nobu@r...>
+
+	* encoding.c (rb_enc_associate_index): deal with ASCII compatible
+	  flags.
+
+	* encoding.c (rb_enc_check): allow ASCII compatible strings.
+
+	* parse.y (rb_intern_str): use ASCII encoding for ASCII string.
+
+	* string.c (rb_enc_str_coderange): check for code-range.
+
+	* string.c (rb_str_modify): clear code-range flags.
+
+	* string.c (rb_str_hash, rb_str_eql): ASCII compatible strings are
+	  comparable.
+
+	* include/ruby/encoding.h: added code-range flags.
+
 Thu Sep 27 04:40:47 2007  Nobuyoshi Nakada  <nobu@r...>
 
 	* gc.c (rb_mark_set): new function to mark keys.
Index: string.c
===================================================================
--- string.c	(revision 13528)
+++ string.c	(revision 13529)
@@ -92,8 +92,37 @@
     }\
 } while (0)
 
+#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_SINGLE)
+
 VALUE rb_fs;
 
+int
+rb_enc_str_coderange(VALUE str)
+{
+    long i;
+    int cr = ENC_CODERANGE(str);
+
+    if (cr == ENC_CODERANGE_UNKNOWN) {
+	cr = ENC_CODERANGE_SINGLE;
+	for (i = 0; i < RSTRING_LEN(str); ++i) {
+	    const char *p = &RSTRING_PTR(str)[i];
+	    int c = (unsigned char)*p;
+
+	    if (!ISASCII(c)) {
+		c = rb_enc_codepoint(p, RSTRING_END(str), rb_enc_get(str));
+		if (c == -1) {
+		    cr = ENC_CODERANGE_BROKEN;
+		}
+		else {
+		    cr = ENC_CODERANGE_MULTI;
+		}
+	    }
+	}
+	ENC_CODERANGE_SET(str, cr);
+    }
+    return cr;
+}
+
 static inline void
 str_mod_check(VALUE s, char *p, long len)
 {
@@ -553,8 +582,8 @@
     return rb_str_format(1, &arg, str);
 }
 
-static int
-str_independent(VALUE str)
+static void
+str_modifiable(VALUE str)
 {
     if (FL_TEST(str, STR_TMPLOCK)) {
 	rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
@@ -562,6 +591,12 @@
     if (OBJ_FROZEN(str)) rb_error_frozen("string");
     if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
 	rb_raise(rb_eSecurityError, "Insecure: can't modify string");
+}
+
+static int
+str_independent(VALUE str)
+{
+    str_modifiable(str);
     if (!STR_SHARED_P(str)) return 1;
     if (STR_EMBED_P(str)) return 1;
     return 0;
@@ -589,6 +624,7 @@
 {
     if (!str_independent(str))
 	str_make_independent(str);
+    ENC_CODERANGE_CLEAR(str);
 }
 
 void
@@ -1129,8 +1165,12 @@
 int
 rb_str_hash(VALUE str)
 {
+    int e = rb_enc_get_index(str);
+    if (e && is_ascii_string(str)) {
+	e = 0;
+    }
     return hash((const void *)RSTRING_PTR(str), RSTRING_LEN(str),
-		rb_enc_get_index(str));
+		e);
 }
 
 /*
@@ -1149,18 +1189,6 @@
 
 #define lesser(a,b) (((a)>(b))?(b):(a))
 
-static int
-is_ascii_string(VALUE str)
-{
-    long i;
-
-    for (i = 0; i < RSTRING_LEN(str); ++i) {
-	int c = (unsigned char)RSTRING_PTR(str)[i];
-	if (!ISASCII(c)) return Qfalse;
-    }
-    return Qtrue;
-}
-
 int
 rb_str_comparable(VALUE str1, VALUE str2)
 {
@@ -1234,8 +1262,7 @@
     if (TYPE(str2) != T_STRING || RSTRING_LEN(str1) != RSTRING_LEN(str2))
 	return Qfalse;
 
-    if (rb_enc_get_index(str1) != rb_enc_get_index(str2))
-	return Qfalse;
+    if (!rb_str_comparable(str1, str2)) return Qfalse;
 
     if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2),
 	       lesser(RSTRING_LEN(str1), RSTRING_LEN(str2))) == 0)
@@ -3529,7 +3556,7 @@
 static VALUE
 rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
 {
-    rb_encoding *enc;
+    rb_encoding *enc = 0;
     char *s, *send, *t;
     VALUE del = 0, nodel = 0;
     int modify = 0;
@@ -3736,7 +3763,7 @@
 static VALUE
 rb_str_count(int argc, VALUE *argv, VALUE str)
 {
-    rb_encoding *enc;
+    rb_encoding *enc = 0;
     VALUE del = 0, nodel = 0;
     char *s, *send;
     int i;
@@ -5065,12 +5092,35 @@
 }
 
 
+/*
+ *  call-seq:
+ *     str.encoding   => str
+ *
+ *  Retruns the encoding name.
+ */
+
 static VALUE
 str_encoding(VALUE str)
 {
     return rb_str_new2(rb_enc_name(rb_enc_get(str)));
 }
 
+
+/*
+ *  call-seq:
+ *     str.associate_encoding(encoding)   => str
+ *
+ *  Changes the encoding to +encoding+ and returns self.
+ */
+
+static VALUE
+rb_str_associate_encoding(VALUE str, VALUE encname)
+{
+    str_modifiable(str);
+    rb_enc_associate(str, rb_enc_find(StringValueCStr(encname)));
+    return str;
+}
+
 /**********************************************************************
  * Document-class: Symbol
  *
@@ -5482,6 +5532,7 @@
     rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
 
     rb_define_method(rb_cString, "encoding", str_encoding, 0);
+    rb_define_method(rb_cString, "associate_encoding", rb_str_associate_encoding, 1);
 
     id_to_s = rb_intern("to_s");
 
Index: parse.y
===================================================================
--- parse.y	(revision 13528)
+++ parse.y	(revision 13529)
@@ -8548,7 +8548,14 @@
 ID
 rb_intern_str(VALUE str)
 {
-    ID id = rb_intern3(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str));
+    int idx = 0;
+    ID id;
+
+    if (rb_enc_str_coderange(str) != ENC_CODERANGE_SINGLE) {
+	idx = rb_enc_get_index(str);
+    }
+    id = rb_intern3(RSTRING_PTR(str), RSTRING_LEN(str),
+		    rb_enc_from_index(idx));
     RB_GC_GUARD(str);
     return id;
 }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]