[前][次][番号順一覧][スレッド一覧]

ruby-changes:22985

From: nobu <ko1@a...>
Date: Thu, 15 Mar 2012 15:04:55 +0900 (JST)
Subject: [ruby-changes:22985] nobu:r35034 (trunk): * parse.y (sym_check_asciionly): check ascii compatibility before

nobu	2012-03-15 15:04:44 +0900 (Thu, 15 Mar 2012)

  New Revision: 35034

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=35034

  Log:
    * parse.y (sym_check_asciionly): check ascii compatibility before
      scanning for code range.
    * parse.y (intern_str): set to us-ascii if ascii only.
      [ruby-dev:45363][Bug #6146]

  Modified files:
    trunk/ChangeLog
    trunk/parse.y
    trunk/test/ruby/test_symbol.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 35033)
+++ ChangeLog	(revision 35034)
@@ -1,5 +1,11 @@
-Thu Mar 15 15:02:58 2012  Nobuyoshi Nakada  <nobu@r...>
+Thu Mar 15 15:04:41 2012  Nobuyoshi Nakada  <nobu@r...>
 
+	* parse.y (sym_check_asciionly): check ascii compatibility before
+	  scanning for code range.
+
+	* parse.y (intern_str): set to us-ascii if ascii only.
+	  [ruby-dev:45363][Bug #6146]
+
 	* file.c (ruby_enc_find_basename): allow NULL as alllen.
 	  [ruby-dev:45363][Bug #6146]
 
Index: parse.y
===================================================================
--- parse.y	(revision 35033)
+++ parse.y	(revision 35034)
@@ -9955,13 +9955,21 @@
 static int
 sym_check_asciionly(VALUE str)
 {
-    int cr = rb_enc_str_coderange(str);
-    if (cr == ENC_CODERANGE_BROKEN) {
+    if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
+    switch (rb_enc_str_coderange(str)) {
+      case ENC_CODERANGE_BROKEN:
     	rb_raise(rb_eEncodingError, "invalid encoding symbol");
+      case ENC_CODERANGE_7BIT:
+	return TRUE;
     }
-    return cr == ENC_CODERANGE_7BIT;
+    return FALSE;
 }
 
+/*
+ * _str_ itself will be registered at the global symbol table.  _str_
+ * can be modified before the registration, since the encoding will be
+ * set to ASCII-8BIT if it is a special global name.
+ */
 static ID intern_str(VALUE str);
 
 ID
@@ -9979,8 +9987,6 @@
     rb_enc_associate(str, enc);
     OBJ_FREEZE(str);
 
-    if (sym_check_asciionly(str)) enc = rb_usascii_encoding();
-
     if (st_lookup(global_symbols.sym_id, str, &data))
 	return (ID)data;
 
@@ -9993,7 +9999,7 @@
 {
     const char *name, *m, *e;
     long len, last;
-    rb_encoding *enc;
+    rb_encoding *enc, *symenc;
     unsigned char c;
     ID id;
     int mb;
@@ -10002,6 +10008,7 @@
     m = name;
     e = m + len;
     enc = rb_enc_get(str);
+    symenc = enc;
 
     if (rb_cString && !rb_enc_asciicompat(enc)) {
 	id = ID_JUNK;
@@ -10013,7 +10020,7 @@
       case '$':
 	id |= ID_GLOBAL;
 	if ((mb = is_special_global_name(++m, e, enc)) != 0) {
-	    if (!--mb) enc = rb_ascii8bit_encoding();
+	    if (!--mb) symenc = rb_usascii_encoding();
 	    goto new_id;
 	}
 	break;
@@ -10075,7 +10082,9 @@
 	}
     }
     if (m - name < len) id = ID_JUNK;
+    if (sym_check_asciionly(str)) symenc = rb_usascii_encoding();
   new_id:
+    if (symenc != enc) rb_enc_associate(str, symenc);
     if (global_symbols.last_id >= ~(ID)0 >> (ID_SCOPE_SHIFT+RUBY_SPECIAL_SHIFT)) {
 	if (len > 20) {
 	    rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %.20s...)",
@@ -10107,21 +10116,11 @@
 ID
 rb_intern_str(VALUE str)
 {
-    rb_encoding *enc;
     st_data_t id;
-    int ascii = sym_check_asciionly(str);
 
     if (st_lookup(global_symbols.sym_id, str, &id))
 	return (ID)id;
-    if (ascii && (enc = rb_usascii_encoding()) != rb_enc_get(str)) {
-	str = rb_str_dup(str);
-	rb_enc_associate(str, enc);
-	OBJ_FREEZE(str);
-    }
-    else {
-	str = rb_str_dup_frozen(str);
-    }
-    return intern_str(str);
+    return intern_str(rb_str_dup(str));
 }
 
 VALUE
Index: test/ruby/test_symbol.rb
===================================================================
--- test/ruby/test_symbol.rb	(revision 35033)
+++ test/ruby/test_symbol.rb	(revision 35034)
@@ -161,4 +161,11 @@
       assert_equal(':"\\u3042\\u3044\\u3046"', "\u3042\u3044\u3046".encode(e).to_sym.inspect)
     end
   end
+
+  def test_symbol_encoding
+    assert_equal(Encoding::US_ASCII, "$-A".force_encoding("iso-8859-15").intern.encoding)
+    assert_equal(Encoding::US_ASCII, "foobar~!".force_encoding("iso-8859-15").intern.encoding)
+    assert_equal(Encoding::UTF_8, "\u{2192}".intern.encoding)
+    assert_raise(EncodingError) {"\xb0a".force_encoding("utf-8").intern}
+  end
 end

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]