[前][次][番号順一覧][スレッド一覧]

ruby-changes:16211

From: naruse <ko1@a...>
Date: Sat, 5 Jun 2010 20:32:20 +0900 (JST)
Subject: [ruby-changes:16211] Ruby:r28177 (trunk): * re.c (rb_reg_expr_str): ASCII incompatible strings

naruse	2010-06-05 20:32:05 +0900 (Sat, 05 Jun 2010)

  New Revision: 28177

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=28177

  Log:
    * re.c (rb_reg_expr_str): ASCII incompatible strings
      must always escape or converted.
    
    * re.c (rb_reg_expr_str): use rb_str_buf_cat_escaped_char
      when resenc is given: for Regexp#inspect or error message.
    
      * re.c (rb_reg_desc): add 'n' for ENCODING_NONE.

  Modified files:
    trunk/ChangeLog
    trunk/re.c
    trunk/test/ruby/test_regexp.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 28176)
+++ ChangeLog	(revision 28177)
@@ -1,3 +1,13 @@
+Sat Jun  5 20:30:49 2010  NARUSE, Yui  <naruse@r...>
+
+	* re.c (rb_reg_expr_str): ASCII incompatible strings
+	  must always escape or converted.
+
+	* re.c (rb_reg_expr_str): use rb_str_buf_cat_escaped_char
+	  when resenc is given: for Regexp#inspect or error message.
+
+	  * re.c (rb_reg_desc): add 'n' for ENCODING_NONE.
+
 Sat Jun  5 01:20:14 2010  NARUSE, Yui  <naruse@r...>
 
 	* string.c (sym_inspect): Escape when the symbol is not
Index: re.c
===================================================================
--- re.c	(revision 28176)
+++ re.c	(revision 28177)
@@ -314,32 +314,47 @@
     }
 }
 
+int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p);
+
 static void
-rb_reg_expr_str(VALUE str, const char *s, long len)
+rb_reg_expr_str(VALUE str, const char *s, long len,
+	rb_encoding *enc, rb_encoding *resenc)
 {
-    rb_encoding *enc = rb_enc_get(str);
     const char *p, *pend;
     int need_escape = 0;
     int c, clen;
 
     p = s; pend = p + len;
-    while (p<pend) {
-        c = rb_enc_ascget(p, pend, &clen, enc);
-        if (c == -1) {
-            p += mbclen(p, pend, enc);
-        }
-        else if (c != '/' && rb_enc_isprint(c, enc)) {
-            p += clen;
-        }
-        else {
-	    need_escape = 1;
-	    break;
-        }
+    if (rb_enc_asciicompat(enc)) {
+	while (p < pend) {
+	    c = rb_enc_ascget(p, pend, &clen, enc);
+	    if (c == -1) {
+		if (enc == resenc) {
+		    p += mbclen(p, pend, enc);
+		}
+		else {
+		    need_escape = 1;
+		    break;
+		}
+	    }
+	    else if (c != '/' && rb_enc_isprint(c, enc)) {
+		p += clen;
+	    }
+	    else {
+		need_escape = 1;
+		break;
+	    }
+	}
     }
+    else {
+	need_escape = 1;
+    }
+
     if (!need_escape) {
 	rb_str_buf_cat(str, s, len);
     }
     else {
+	int unicode_p = rb_enc_unicode_p(enc);
 	p = s;
 	while (p<pend) {
             c = rb_enc_ascget(p, pend, &clen, enc);
@@ -355,8 +370,15 @@
 		rb_str_buf_cat(str, p, clen);
 	    }
 	    else if (c == -1) {
-                int l = mbclen(p, pend, enc);
-		rb_str_buf_cat(str, p, l);
+                int l;
+		if (resenc) {
+		    unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc);
+		    l = rb_str_buf_cat_escaped_char(str, c, unicode_p);
+		}
+		else {
+		    l = mbclen(p, pend, enc);
+		    rb_str_buf_cat(str, p, l);
+		}
 		p += l;
 		continue;
 	    }
@@ -380,20 +402,26 @@
 static VALUE
 rb_reg_desc(const char *s, long len, VALUE re)
 {
+    rb_encoding *enc = rb_enc_get(re);
     VALUE str = rb_str_buf_new2("/");
-    if (re && rb_enc_asciicompat(rb_enc_get(re))) {
+    rb_encoding *resenc = rb_default_internal_encoding();
+    if (resenc == NULL) resenc = rb_default_external_encoding();
+
+    if (re && rb_enc_asciicompat(enc)) {
 	rb_enc_copy(str, re);
     }
     else {
 	rb_enc_associate(str, rb_usascii_encoding());
     }
-    rb_reg_expr_str(str, s, len);
+    rb_reg_expr_str(str, s, len, enc, resenc);
     rb_str_buf_cat2(str, "/");
     if (re) {
 	char opts[4];
 	rb_reg_check(re);
 	if (*option_to_str(opts, RREGEXP(re)->ptr->options))
 	    rb_str_buf_cat2(str, opts);
+	if (RBASIC(re)->flags & REG_ENCODING_NONE)
+	    rb_str_buf_cat2(str, "n");
     }
     OBJ_INFECT(str, re);
     return str;
@@ -476,6 +504,7 @@
     const UChar* ptr;
     VALUE str = rb_str_buf_new2("(?");
     char optbuf[5];
+    rb_encoding *enc = rb_enc_get(re);
 
     rb_reg_check(re);
 
@@ -524,7 +553,7 @@
 	    ++ptr;
 	    len -= 2;
             err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
-			   rb_enc_get(re), OnigDefaultSyntax, NULL);
+			   enc, OnigDefaultSyntax, NULL);
 	    onig_free(rp);
 	}
 	if (err) {
@@ -543,7 +572,7 @@
     }
 
     rb_str_buf_cat2(str, ":");
-    rb_reg_expr_str(str, (char*)ptr, len);
+    rb_reg_expr_str(str, (char*)ptr, len, enc, NULL);
     rb_str_buf_cat2(str, ")");
     rb_enc_copy(str, re);
 
@@ -564,10 +593,12 @@
 {
     char opts[6];
     VALUE desc = rb_str_buf_new2(err);
+    rb_encoding *resenc = rb_default_internal_encoding();
+    if (resenc == NULL) resenc = rb_default_external_encoding();
 
     rb_enc_associate(desc, enc);
     rb_str_buf_cat2(desc, ": /");
-    rb_reg_expr_str(desc, s, len);
+    rb_reg_expr_str(desc, s, len, enc, resenc);
     opts[0] = '/';
     option_to_str(opts + 1, options);
     rb_str_buf_cat2(desc, opts);
Index: test/ruby/test_regexp.rb
===================================================================
--- test/ruby/test_regexp.rb	(revision 28176)
+++ test/ruby/test_regexp.rb	(revision 28177)
@@ -151,7 +151,7 @@
     assert_equal('/\x00/i', /#{"\0"}/i.inspect)
     assert_equal("/\n/i", /#{"\n"}/i.inspect)
     s = [0xff].pack("C")
-    assert_equal('/\/'+s+'/i', /\/#{s}/i.inspect)
+    assert_equal('/\/\xFF/i', /\/#{s}/i.inspect)
   end
 
   def test_char_to_option

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]