ruby-changes:33380
From: nobu <ko1@a...>
Date: Fri, 28 Mar 2014 11:28:22 +0900 (JST)
Subject: [ruby-changes:33380] nobu:r45459 (trunk): string.c: backref substitution
nobu 2014-03-28 11:28:16 +0900 (Fri, 28 Mar 2014) New Revision: 45459 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=45459 Log: string.c: backref substitution * re.c (rb_reg_regsub): allow nil regexp for string matching. * string.c (rb_str_sub_bang, str_gsub): make substitution if replacement string has backref escapes. Modified files: trunk/re.c trunk/string.c trunk/test/ruby/test_string.rb Index: re.c =================================================================== --- re.c (revision 45458) +++ re.c (revision 45459) @@ -3402,7 +3402,7 @@ rb_reg_regsub(VALUE str, VALUE src, stru https://github.com/ruby/ruby/blob/trunk/re.c#L3402 switch (c) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - if (onig_noname_group_capture_is_active(RREGEXP(regexp)->ptr)) { + if (!NIL_P(regexp) && onig_noname_group_capture_is_active(RREGEXP(regexp)->ptr)) { no = c - '0'; } else { Index: string.c =================================================================== --- string.c (revision 45458) +++ string.c (revision 45459) @@ -3957,17 +3957,16 @@ rb_str_sub_bang(int argc, VALUE *argv, V https://github.com/ruby/ruby/blob/trunk/string.c#L3957 char *p, *rp; long len, rlen; + match = rb_backref_get(); + regs = RMATCH_REGS(match); if (RB_TYPE_P(pat, T_STRING)) { beg0 = beg; end0 = beg0 + RSTRING_LEN(pat); match0 = pat; } else { - match = rb_backref_get(); - regs = RMATCH_REGS(match); beg0 = BEG(0); end0 = END(0); - if (!iter && NIL_P(hash)) repl = rb_reg_regsub(repl, str, regs, pat); if (iter) match0 = rb_reg_nth_match(0, match); } @@ -3984,6 +3983,9 @@ rb_str_sub_bang(int argc, VALUE *argv, V https://github.com/ruby/ruby/blob/trunk/string.c#L3983 str_mod_check(str, p, len); rb_check_frozen(str); } + else { + repl = rb_reg_regsub(repl, str, regs, RB_TYPE_P(pat, T_STRING) ? Qnil : pat); + } enc = rb_enc_compatible(str, repl); if (!enc) { @@ -4086,16 +4088,16 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/string.c#L4088 long beg, n; long beg0, end0; long offset, blen, slen, len, last; - int iter = 0; + enum {STR, ITER, MAP} mode = STR; char *sp, *cp; int tainted = 0; - int need_backref; + int need_backref = -1; rb_encoding *str_enc; switch (argc) { case 1: RETURN_ENUMERATOR(str, argc, argv); - iter = 1; + mode = ITER; break; case 2: repl = argv[1]; @@ -4103,6 +4105,9 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/string.c#L4105 if (NIL_P(hash)) { StringValue(repl); } + else { + mode = MAP; + } if (OBJ_TAINTED(repl)) tainted = 1; break; default: @@ -4110,7 +4115,6 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/string.c#L4115 } pat = get_pat_quoted(argv[0], 1); - need_backref = iter || !NIL_P(hash); beg = rb_pat_search(pat, str, 0, need_backref); if (beg < 0) { if (bang) return Qnil; /* no match, no substitution */ @@ -4131,23 +4135,21 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/string.c#L4135 do { n++; + match = rb_backref_get(); + regs = RMATCH_REGS(match); if (RB_TYPE_P(pat, T_STRING)) { beg0 = beg; end0 = beg0 + RSTRING_LEN(pat); - if (!need_backref) val = repl; match0 = pat; } else { - match = rb_backref_get(); - regs = RMATCH_REGS(match); beg0 = BEG(0); end0 = END(0); - if (!need_backref) val = rb_reg_regsub(repl, str, regs, pat); - if (iter) match0 = rb_reg_nth_match(0, match); + if (mode == ITER) match0 = rb_reg_nth_match(0, match); } - if (need_backref) { - if (iter) { + if (mode) { + if (mode == ITER) { val = rb_obj_as_string(rb_yield(match0)); } else { @@ -4159,6 +4161,16 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/string.c#L4161 rb_raise(rb_eRuntimeError, "block should not cheat"); } } + else if (need_backref) { + val = rb_reg_regsub(repl, str, regs, RB_TYPE_P(pat, T_STRING) ? Qnil : pat); + if (need_backref < 0) { + need_backref = val != repl; + } + } + else { + val = repl; + } + if (OBJ_TAINTED(val)) tainted = 1; Index: test/ruby/test_string.rb =================================================================== --- test/ruby/test_string.rb (revision 45458) +++ test/ruby/test_string.rb (revision 45459) @@ -808,6 +808,7 @@ class TestString < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L808 S("hello").gsub(/./) { |s| s[0].to_s + S(' ')}) assert_equal(S("HELL-o"), S("hello").gsub(/(hell)(.)/) { |s| $1.upcase + S('-') + $2 }) + assert_equal(S("<>h<>e<>l<>l<>o<>"), S("hello").gsub(S(''), S('<\0>'))) a = S("hello") a.taint @@ -1415,6 +1416,7 @@ class TestString < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L1416 assert_equal(S("HELL-o"), S("hello").sub(/(hell)(.)/) { |s| $1.upcase + S('-') + $2 }) + assert_equal(S("h<e>llo"), S("hello").sub('e', S('<\0>'))) assert_equal(S("a\\aba"), S("ababa").sub(/b/, '\\')) assert_equal(S("ab\\aba"), S("ababa").sub(/(b)/, '\1\\')) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/