ruby-changes:16214
From: naruse <ko1@a...>
Date: Sat, 5 Jun 2010 23:16:19 +0900 (JST)
Subject: [ruby-changes:16214] Ruby:r28180 (ruby_1_9_2): merge revision(s) 28174:28178:
naruse 2010-06-05 23:16:05 +0900 (Sat, 05 Jun 2010) New Revision: 28180 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=28180 Log: merge revision(s) 28174:28178: * re.c (rb_reg_expr_str): ASCII incompatible strings must always escape or converted. * re.c (rb_reg_expr_str): use rb_str_buf_cat_escaped_char when resenc is given: for Regexp#inspect or error message. * re.c (rb_reg_desc): add 'n' for ENCODING_NONE. * string.c (sym_inspect): Escape when the symbol is not resulted encoding and not ascii_only. It had escaped ascii-incompatible string, but it is wrong. * string.c (rb_str_buf_cat_escaped_char): defined. Splited from rb_str_inspect. Modified files: branches/ruby_1_9_2/ChangeLog branches/ruby_1_9_2/re.c branches/ruby_1_9_2/string.c branches/ruby_1_9_2/test/ruby/envutil.rb branches/ruby_1_9_2/test/ruby/test_regexp.rb Index: ruby_1_9_2/ChangeLog =================================================================== --- ruby_1_9_2/ChangeLog (revision 28179) +++ ruby_1_9_2/ChangeLog (revision 28180) @@ -1,3 +1,24 @@ +Sat Jun 5 23:15:42 2010 NARUSE, Yui <naruse@r...> + + * re.c (rb_reg_expr_str): ASCII incompatible strings + must always escape or converted. + + * re.c (rb_reg_expr_str): use rb_str_buf_cat_escaped_char + when resenc is given: for Regexp#inspect or error message. + + * re.c (rb_reg_desc): add 'n' for ENCODING_NONE. + +Sat Jun 5 23:15:42 2010 NARUSE, Yui <naruse@r...> + + * string.c (sym_inspect): Escape when the symbol is not + resulted encoding and not ascii_only. It had escaped + ascii-incompatible string, but it is wrong. + +Sat Jun 5 23:15:42 2010 NARUSE, Yui <naruse@r...> + + * string.c (rb_str_buf_cat_escaped_char): defined. + Splited from rb_str_inspect. + Sat Jun 5 23:14:51 2010 NARUSE, Yui <naruse@r...> * string.c (rb_str_inspect): inspect as ASCII when the codepoint Index: ruby_1_9_2/re.c =================================================================== --- ruby_1_9_2/re.c (revision 28179) +++ ruby_1_9_2/re.c (revision 28180) @@ -314,32 +314,47 @@ } } +int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p); + static void -rb_reg_expr_str(VALUE str, const char *s, long len) +rb_reg_expr_str(VALUE str, const char *s, long len, + rb_encoding *enc, rb_encoding *resenc) { - rb_encoding *enc = rb_enc_get(str); const char *p, *pend; int need_escape = 0; int c, clen; p = s; pend = p + len; - while (p<pend) { - c = rb_enc_ascget(p, pend, &clen, enc); - if (c == -1) { - p += mbclen(p, pend, enc); - } - else if (c != '/' && rb_enc_isprint(c, enc)) { - p += clen; - } - else { - need_escape = 1; - break; - } + if (rb_enc_asciicompat(enc)) { + while (p < pend) { + c = rb_enc_ascget(p, pend, &clen, enc); + if (c == -1) { + if (enc == resenc) { + p += mbclen(p, pend, enc); + } + else { + need_escape = 1; + break; + } + } + else if (c != '/' && rb_enc_isprint(c, enc)) { + p += clen; + } + else { + need_escape = 1; + break; + } + } } + else { + need_escape = 1; + } + if (!need_escape) { rb_str_buf_cat(str, s, len); } else { + int unicode_p = rb_enc_unicode_p(enc); p = s; while (p<pend) { c = rb_enc_ascget(p, pend, &clen, enc); @@ -355,8 +370,15 @@ rb_str_buf_cat(str, p, clen); } else if (c == -1) { - int l = mbclen(p, pend, enc); - rb_str_buf_cat(str, p, l); + int l; + if (resenc) { + unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc); + l = rb_str_buf_cat_escaped_char(str, c, unicode_p); + } + else { + l = mbclen(p, pend, enc); + rb_str_buf_cat(str, p, l); + } p += l; continue; } @@ -380,20 +402,26 @@ static VALUE rb_reg_desc(const char *s, long len, VALUE re) { + rb_encoding *enc = rb_enc_get(re); VALUE str = rb_str_buf_new2("/"); - if (re && rb_enc_asciicompat(rb_enc_get(re))) { + rb_encoding *resenc = rb_default_internal_encoding(); + if (resenc == NULL) resenc = rb_default_external_encoding(); + + if (re && rb_enc_asciicompat(enc)) { rb_enc_copy(str, re); } else { rb_enc_associate(str, rb_usascii_encoding()); } - rb_reg_expr_str(str, s, len); + rb_reg_expr_str(str, s, len, enc, resenc); rb_str_buf_cat2(str, "/"); if (re) { char opts[4]; rb_reg_check(re); if (*option_to_str(opts, RREGEXP(re)->ptr->options)) rb_str_buf_cat2(str, opts); + if (RBASIC(re)->flags & REG_ENCODING_NONE) + rb_str_buf_cat2(str, "n"); } OBJ_INFECT(str, re); return str; @@ -476,6 +504,7 @@ const UChar* ptr; VALUE str = rb_str_buf_new2("(?"); char optbuf[5]; + rb_encoding *enc = rb_enc_get(re); rb_reg_check(re); @@ -524,7 +553,7 @@ ++ptr; len -= 2; err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT, - rb_enc_get(re), OnigDefaultSyntax, NULL); + enc, OnigDefaultSyntax, NULL); onig_free(rp); } if (err) { @@ -543,7 +572,7 @@ } rb_str_buf_cat2(str, ":"); - rb_reg_expr_str(str, (char*)ptr, len); + rb_reg_expr_str(str, (char*)ptr, len, enc, NULL); rb_str_buf_cat2(str, ")"); rb_enc_copy(str, re); @@ -564,10 +593,12 @@ { char opts[6]; VALUE desc = rb_str_buf_new2(err); + rb_encoding *resenc = rb_default_internal_encoding(); + if (resenc == NULL) resenc = rb_default_external_encoding(); rb_enc_associate(desc, enc); rb_str_buf_cat2(desc, ": /"); - rb_reg_expr_str(desc, s, len); + rb_reg_expr_str(desc, s, len, enc, resenc); opts[0] = '/'; option_to_str(opts + 1, options); rb_str_buf_cat2(desc, opts); Index: ruby_1_9_2/string.c =================================================================== --- ruby_1_9_2/string.c (revision 28179) +++ ruby_1_9_2/string.c (revision 28180) @@ -4078,6 +4078,36 @@ } #endif +#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ + +int +rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p) { + char buf[CHAR_ESC_LEN + 1]; + int l; + if (unicode_p) { + if (c < 0x7F && ISPRINT(c)) { + snprintf(buf, CHAR_ESC_LEN, "%c", c); + } + else if (c < 0x10000) { + snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c); + } + else { + snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c); + } + } + else { + if (c < 0x100) { + snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c); + } + else { + snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c); + } + } + l = strlen(buf); + rb_str_buf_cat(result, buf, l); + return l; +} + /* * call-seq: * str.inspect -> string @@ -4095,7 +4125,6 @@ { rb_encoding *enc = STR_ENC_GET(str); const char *p, *pend, *prev; -#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ char buf[CHAR_ESC_LEN + 1]; VALUE result = rb_str_buf_new(0); rb_encoding *resenc = rb_default_internal_encoding(); @@ -4165,27 +4194,7 @@ } else { if (p - n > prev) str_buf_cat(result, prev, p - n - prev); - if (unicode_p) { - if (c < 0x100 && ISPRINT(c)) { - snprintf(buf, CHAR_ESC_LEN, "%c", c); - } - else if (c < 0x10000) { - snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c); - } - else { - snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c); - } - str_buf_cat(result, buf, strlen(buf)); - } - else { - if (c < 0x100) { - snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c); - } - else { - snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c); - } - str_buf_cat(result, buf, strlen(buf)); - } + rb_str_buf_cat_escaped_char(result, c, unicode_p); prev = p; continue; } @@ -7069,12 +7078,14 @@ const char *ptr; long len; char *dest; + rb_encoding *resenc = rb_default_internal_encoding(); + if (resenc == NULL) resenc = rb_default_external_encoding(); sym = rb_id2str(id); enc = STR_ENC_GET(sym); ptr = RSTRING_PTR(sym); len = RSTRING_LEN(sym); - if (!rb_enc_asciicompat(enc) || len != (long)strlen(ptr) || + if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (long)strlen(ptr) || !rb_enc_symname_p(ptr, enc) || !sym_printable(ptr, ptr + len, enc)) { str = rb_str_inspect(sym); len = RSTRING_LEN(str); Index: ruby_1_9_2/test/ruby/test_regexp.rb =================================================================== --- ruby_1_9_2/test/ruby/test_regexp.rb (revision 28179) +++ ruby_1_9_2/test/ruby/test_regexp.rb (revision 28180) @@ -151,7 +151,7 @@ assert_equal('/\x00/i', /#{"\0"}/i.inspect) assert_equal("/\n/i", /#{"\n"}/i.inspect) s = [0xff].pack("C") - assert_equal('/\/'+s+'/i', /\/#{s}/i.inspect) + assert_equal('/\/\xFF/i', /\/#{s}/i.inspect) end def test_char_to_option Index: ruby_1_9_2/test/ruby/envutil.rb =================================================================== --- ruby_1_9_2/test/ruby/envutil.rb (revision 28179) +++ ruby_1_9_2/test/ruby/envutil.rb (revision 28180) @@ -64,6 +64,7 @@ module_function :rubyexec def invoke_ruby(args, stdin_data="", capture_stdout=false, capture_stderr=false, opt={}) + args = [args] if args.kind_of?(String) begin in_c, in_p = IO.pipe out_p, out_c = IO.pipe if capture_stdout -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/