[前][次][番号順一覧][スレッド一覧]

ruby-changes:33372

From: nobu <ko1@a...>
Date: Thu, 27 Mar 2014 18:58:18 +0900 (JST)
Subject: [ruby-changes:33372] nobu:r45451 (trunk): string.c: search by rb_str_index

nobu	2014-03-27 18:58:12 +0900 (Thu, 27 Mar 2014)

  New Revision: 45451

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=45451

  Log:
    string.c: search by rb_str_index
    
    * re.c (match_regexp): set regexp for MatchData from string.
    * re.c (rb_backref_set_string): create MatchData from string and
      set backref.
    * string.c (rb_pat_search, rb_str_sub, rb_str_sub_bang, str_gsub),
      (scan_once, rb_str_scan, rb_str_partition): use rb_str_index
      instead of rb_reg_search() when pattern is a String.  based on
      the patch by Sam Rawlins <sam.rawlins@g...> [Fixes GH-579]

  Modified files:
    trunk/ChangeLog
    trunk/internal.h
    trunk/re.c
    trunk/string.c
    trunk/test/ruby/test_string.rb
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 45450)
+++ ChangeLog	(revision 45451)
@@ -1,3 +1,15 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Thu Mar 27 18:58:10 2014  Nobuyoshi Nakada  <nobu@r...>
+
+	* re.c (match_regexp): set regexp for MatchData from string.
+
+	* re.c (rb_backref_set_string): create MatchData from string and
+	  set backref.
+
+	* string.c (rb_pat_search, rb_str_sub, rb_str_sub_bang, str_gsub),
+	  (scan_once, rb_str_scan, rb_str_partition): use rb_str_index
+	  instead of rb_reg_search() when pattern is a String.  based on
+	  the patch by Sam Rawlins <sam.rawlins@g...> [Fixes GH-579]
+
 Thu Mar 27 11:58:55 2014  NARUSE, Yui  <naruse@r...>
 
 	* addr2line.c (fill_lines): check shdr[i].sh_type because even if
Index: re.c
===================================================================
--- re.c	(revision 45450)
+++ re.c	(revision 45451)
@@ -1017,8 +1017,15 @@ match_init_copy(VALUE obj, VALUE orig) https://github.com/ruby/ruby/blob/trunk/re.c#L1017
 static VALUE
 match_regexp(VALUE match)
 {
+    VALUE regexp;
     match_check(match);
-    return RMATCH(match)->regexp;
+    regexp = RMATCH(match)->regexp;
+    if (NIL_P(regexp)) {
+	VALUE str = rb_reg_nth_match(0, match);
+	regexp = rb_reg_regcomp(rb_reg_quote(str));
+	RMATCH(match)->regexp = regexp;
+    }
+    return regexp;
 }
 
 /*
@@ -1216,6 +1223,31 @@ rb_match_busy(VALUE match) https://github.com/ruby/ruby/blob/trunk/re.c#L1223
     FL_SET(match, MATCH_BUSY);
 }
 
+static void
+match_set_string(VALUE m, VALUE string, long pos, long len)
+{
+    struct RMatch *match = (struct RMatch *)m;
+    struct rmatch *rmatch = match->rmatch;
+
+    match->str = string;
+    match->regexp = Qnil;
+    onig_region_resize(&rmatch->regs, 1);
+    rmatch->regs.beg[0] = pos;
+    rmatch->regs.end[0] = pos + len;
+    rmatch->char_offset_updated = 0;
+}
+
+void
+rb_backref_set_string(VALUE string, long pos, long len)
+{
+    VALUE match = rb_backref_get();
+    if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
+	match = match_alloc(rb_cMatch);
+    }
+    match_set_string(match, string, pos, len);
+    rb_backref_set(match);
+}
+
 /*
  *  call-seq:
  *     rxp.fixed_encoding?   -> true or false
@@ -1909,6 +1941,10 @@ match_inspect(VALUE match) https://github.com/ruby/ruby/blob/trunk/re.c#L1941
     if (regexp == 0) {
         return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
     }
+    else if (NIL_P(regexp)) {
+        return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
+			  cname, rb_reg_nth_match(0, match));
+    }
 
     names = ALLOCA_N(struct backref_name_tag, num_regs);
     MEMZERO(names, struct backref_name_tag, num_regs);
Index: string.c
===================================================================
--- string.c	(revision 45450)
+++ string.c	(revision 45451)
@@ -2906,7 +2906,7 @@ rb_str_match(VALUE x, VALUE y) https://github.com/ruby/ruby/blob/trunk/string.c#L2906
 }
 
 
-static VALUE get_pat(VALUE, int);
+static VALUE get_pat(VALUE);
 
 
 /*
@@ -2946,7 +2946,7 @@ rb_str_match_m(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/string.c#L2946
 	rb_check_arity(argc, 1, 2);
     re = argv[0];
     argv[0] = str;
-    result = rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv);
+    result = rb_funcall2(get_pat(re), rb_intern("match"), argc, argv);
     if (!NIL_P(result) && rb_block_given_p()) {
 	return rb_yield(result);
     }
@@ -3837,11 +3837,12 @@ rb_str_slice_bang(int argc, VALUE *argv, https://github.com/ruby/ruby/blob/trunk/string.c#L3837
 }
 
 static VALUE
-get_pat(VALUE pat, int quote)
+get_pat(VALUE pat)
 {
     VALUE val;
 
-    switch (TYPE(pat)) {
+    if (SPECIAL_CONST_P(pat)) goto to_string;
+    switch (BUILTIN_TYPE(pat)) {
       case T_REGEXP:
 	return pat;
 
@@ -3849,6 +3850,7 @@ get_pat(VALUE pat, int quote) https://github.com/ruby/ruby/blob/trunk/string.c#L3850
 	break;
 
       default:
+      to_string:
 	val = rb_check_string_type(pat);
 	if (NIL_P(val)) {
 	    Check_Type(pat, T_REGEXP);
@@ -3856,11 +3858,50 @@ get_pat(VALUE pat, int quote) https://github.com/ruby/ruby/blob/trunk/string.c#L3858
 	pat = val;
     }
 
-    if (quote) {
-	pat = rb_reg_quote(pat);
+    return rb_reg_regcomp(pat);
+}
+
+static VALUE
+get_pat_quoted(VALUE pat, int check)
+{
+    VALUE val;
+
+    if (SPECIAL_CONST_P(pat)) goto to_string;
+    switch (BUILTIN_TYPE(pat)) {
+      case T_REGEXP:
+	return pat;
+
+      case T_STRING:
+	break;
+
+      default:
+      to_string:
+	val = rb_check_string_type(pat);
+	if (NIL_P(val)) {
+	    Check_Type(pat, T_REGEXP);
+	}
+	pat = val;
+    }
+    if (check && is_broken_string(pat)) {
+	rb_raise(rb_eTypeError, "%"PRIsVALUE, rb_reg_new_str(pat, 0));
     }
+    return pat;
+}
 
-    return rb_reg_regcomp(pat);
+static long
+rb_pat_search(VALUE pat, VALUE str, long pos, int set_backref_str)
+{
+    if (BUILTIN_TYPE(pat) == T_STRING) {
+	pos = rb_str_index(str, pat, pos);
+	if (pos >= 0 && set_backref_str) {
+	    str = rb_str_new_frozen(str);
+	    rb_backref_set_string(str, pos, RSTRING_LEN(pat));
+	}
+	return pos;
+    }
+    else {
+	return rb_reg_search0(pat, str, pos, 0, set_backref_str);
+    }
 }
 
 
@@ -3883,6 +3924,7 @@ rb_str_sub_bang(int argc, VALUE *argv, V https://github.com/ruby/ruby/blob/trunk/string.c#L3924
     int tainted = 0;
     long plen;
     int min_arity = rb_block_given_p() ? 1 : 2;
+    long beg;
 
     rb_check_arity(argc, min_arity, 2);
     if (argc == 1) {
@@ -3897,23 +3939,38 @@ rb_str_sub_bang(int argc, VALUE *argv, V https://github.com/ruby/ruby/blob/trunk/string.c#L3939
 	if (OBJ_TAINTED(repl)) tainted = 1;
     }
 
-    pat = get_pat(argv[0], 1);
+    pat = get_pat_quoted(argv[0], 1);
+
     str_modifiable(str);
-    if (rb_reg_search(pat, str, 0, 0) >= 0) {
+    beg = rb_pat_search(pat, str, 0, 1);
+    if (beg >= 0) {
 	rb_encoding *enc;
 	int cr = ENC_CODERANGE(str);
-	VALUE match = rb_backref_get();
-	struct re_registers *regs = RMATCH_REGS(match);
-	long beg0 = BEG(0);
-	long end0 = END(0);
+	long beg0, end0;
+	VALUE match, match0;
+	struct re_registers *regs;
 	char *p, *rp;
 	long len, rlen;
 
+	if (RB_TYPE_P(pat, T_STRING)) {
+	    beg0 = beg;
+	    end0 = beg0 + RSTRING_LEN(pat);
+	    match0 = pat;
+	}
+	else {
+	    match = rb_backref_get();
+	    regs = RMATCH_REGS(match);
+	    beg0 = BEG(0);
+	    end0 = END(0);
+	    if (!iter && NIL_P(hash)) repl = rb_reg_regsub(repl, str, regs, pat);
+	    if (iter) match0 = rb_reg_nth_match(0, match);
+	}
+
 	if (iter || !NIL_P(hash)) {
 	    p = RSTRING_PTR(str); len = RSTRING_LEN(str);
 
             if (iter) {
-                repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
+                repl = rb_obj_as_string(rb_yield(match0));
             }
             else {
                 repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
@@ -3922,9 +3979,7 @@ rb_str_sub_bang(int argc, VALUE *argv, V https://github.com/ruby/ruby/blob/trunk/string.c#L3979
 	    str_mod_check(str, p, len);
 	    rb_check_frozen(str);
 	}
-	else {
-	    repl = rb_reg_regsub(repl, str, regs, pat);
-	}
+
         enc = rb_enc_compatible(str, repl);
         if (!enc) {
             rb_encoding *str_enc = STR_ENC_GET(str);
@@ -4021,7 +4076,7 @@ rb_str_sub(int argc, VALUE *argv, VALUE https://github.com/ruby/ruby/blob/trunk/string.c#L4076
 static VALUE
 str_gsub(int argc, VALUE *argv, VALUE str, int bang)
 {
-    VALUE pat, val, repl, match, dest, hash = Qnil;
+    VALUE pat, val, repl, match, match0, dest, hash = Qnil;
     struct re_registers *regs;
     long beg, n;
     long beg0, end0;
@@ -4049,9 +4104,9 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/string.c#L4104
 	rb_check_arity(argc, 1, 2);
     }
 
-    pat = get_pat(argv[0], 1);
+    pat = get_pat_quoted(argv[0], 1);
     need_backref = iter || !NIL_P(hash);
-    beg = rb_reg_search0(pat, str, 0, 0, need_backref);
+    beg = rb_pat_search(pat, str, 0, need_backref);
     if (beg < 0) {
 	if (bang) return Qnil;	/* no match, no substitution */
 	return rb_str_dup(str);
@@ -4070,16 +4125,28 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/string.c#L4125
 
     do {
 	n++;
-	match = rb_backref_get();
-	regs = RMATCH_REGS(match);
-	beg0 = BEG(0);
-	end0 = END(0);
+
+	if (RB_TYPE_P(pat, T_STRING)) {
+	    beg0 = beg;
+	    end0 = beg0 + RSTRING_LEN(pat);
+	    if (!need_backref) val = repl;
+	    match0 = pat;
+	}
+	else {
+	    match = rb_backref_get();
+	    regs = RMATCH_REGS(match);
+	    beg0 = BEG(0);
+	    end0 = END(0);
+	    if (!need_backref) val = rb_reg_regsub(repl, str, regs, pat);
+	    if (iter) match0 = rb_reg_nth_match(0, match);
+	}
+
 	if (need_backref) {
             if (iter) {
-                val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
+                val = rb_obj_as_string(rb_yield(match0));
             }
             else {
-                val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
+                val = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
                 val = rb_obj_as_string(val);
             }
 	    str_mod_check(str, sp, slen);
@@ -4087,9 +4154,6 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/string.c#L4154
 		rb_raise(rb_eRuntimeError, "block should not cheat");
 	    }
 	}
-	else {
-	    val = rb_reg_regsub(repl, str, regs, pat);
-	}
 
 	if (OBJ_TAINTED(val)) tainted = 1;
 
@@ -4114,12 +4178,12 @@ str_gsub(int argc, VALUE *argv, VALUE st https://github.com/ruby/ruby/blob/trunk/string.c#L4178
 	}
 	cp = RSTRING_PTR(str) + offset;
 	if (offset > RSTRING_LEN(str)) break;
-	beg = rb_reg_search0(pat, str, offset, 0, need_backref);
+	beg = rb_pat_search(pat, str, offset, need_backref);
     } while (beg >= 0);
     if (RSTRING_LEN(str) > offset) {
         rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
     }
-    rb_reg_search(pat, str, last, 0);
+    rb_pat_search(pat, str, last, 1);
     if (bang) {
         rb_str_shared_replace(str, dest);
     }
@@ -6118,7 +6182,8 @@ rb_str_split_m(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/string.c#L6182
     }
     else {
       fs_set:
-	if (RB_TYPE_P(spat, T_STRING)) {
+	spat = get_pat_quoted(spat, 1);
+	if (BUILTIN_TYPE(spat) == T_STRING) {
 	    rb_encoding *enc2 = STR_ENC_GET(spat);
 
 	    split_type = string;
@@ -6141,7 +6206,6 @@ rb_str_split_m(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/string.c#L6206
 	    }
 	}
 	else {
-	    spat = get_pat(spat, 1);
 	    split_type = regexp;
 	}
     }
@@ -7143,7 +7207,7 @@ scan_once(VALUE str, VALUE pat, long *st https://github.com/ruby/ruby/blob/trunk/string.c#L7207
     struct re_registers *regs;
     int i;
 
-    if (rb_reg_search(pat, str, *start, 0) >= 0) {
+    if (rb_pat_search(pat, str, *start, 1) >= 0) {
 	match = rb_backref_get();
 	regs = RMATCH_REGS(match);
 	if (BEG(0) == END(0)) {
@@ -7213,7 +7277,8 @@ rb_str_scan(VALUE str, VALUE pat) https://github.com/ruby/ruby/blob/trunk/string.c#L7277
     long last = -1, prev = 0;
     char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
 
-    pat = get_pat(pat, 1);
+    pat = get_pat_quoted(pat, 1);
+    mustnot_broken(str);
     if (!rb_block_given_p()) {
 	VALUE ary = rb_ary_new();
 
@@ -7222,7 +7287,7 @@ rb_str_scan(VALUE str, VALUE pat) https://github.com/ruby/ruby/blob/trunk/string.c#L7287
 	    prev = start;
 	    rb_ary_push(ary, result);
 	}
-	if (last >= 0) rb_reg_search(pat, str, last, 0);
+	if (last >= 0) rb_pat_search(pat, str, last, 1);
 	return ary;
     }
 
@@ -7232,7 +7297,7 @@ rb_str_scan(VALUE str, VALUE pat) https://github.com/ruby/ruby/blob/trunk/string.c#L7297
 	rb_yield(result);
 	str_mod_check(str, p, len);
     }
-    if (last >= 0) rb_reg_search(pat, str, last, 0);
+    if (last >= 0) rb_pat_search(pat, str, last, 1);
     return str;
 }
 
@@ -7619,31 +7684,21 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/string.c#L7684
 rb_str_partition(VALUE str, VALUE sep)
 {
     long pos;
-    int regex = FALSE;
 
+    sep = get_pat_quoted(sep, 0);
     if (RB_TYPE_P(sep, T_REGEXP)) {
 	pos = rb_reg_search(sep, str, 0, 0);
-	regex = TRUE;
-    }
-    else {
-	VALUE tmp;
-
-	tmp = rb_check_string_type(sep);
-	if (NIL_P(tmp)) {
-	    rb_raise(rb_eTypeError, "type mismatch: %s given",
-		     rb_obj_classname(sep));
+	if (pos < 0) {
+	  failed:
+	    return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
 	}
-	sep = tmp;
-	pos = rb_str_index(str, sep, 0);
-    }
-    if (pos < 0) {
-      failed:
-	return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
-    }
-    if (regex) {
 	sep = rb_str_subpat(str, sep, INT2FIX(0));
 	if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
     }
+    else {
+	pos = rb_str_index(str, sep, 0);
+	if (pos < 0) goto failed;
+    }
     return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
 		          sep,
 		          rb_str_subseq(str, pos+RSTRING_LEN(sep),
Index: internal.h
===================================================================
--- internal.h	(revision 45450)
+++ internal.h	(revision 45451)
@@ -821,6 +821,7 @@ VALUE rb_rational_reciprocal(VALUE x); https://github.com/ruby/ruby/blob/trunk/internal.h#L821
 VALUE rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline);
 VALUE rb_reg_check_preprocess(VALUE);
 long rb_reg_search0(VALUE, VALUE, long, int, int);
+void rb_backref_set_string(VALUE string, long pos, long len);
 
 /* signal.c */
 int rb_get_next_signal(void);
Index: test/ruby/test_string.rb
===================================================================
--- test/ruby/test_string.rb	(revision 45450)
+++ test/ruby/test_string.rb	(revision 45451)
@@ -831,6 +831,8 @@ class TestString < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L831
     c.force_encoding Encoding::US_ASCII
 
     assert_equal Encoding::UTF_8, a.gsub(/world/, c).encoding
+
+    assert_equal S("a\u{e9}apos&lt;"), S("a\u{e9}'&lt;").gsub("'", "apos")
   end
 
   def test_gsub!
@@ -1454,6 +1456,12 @@ class TestString < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L1456
     o = Object.new
     def o.to_s; self; end
     assert_match(/^foo#<Object:0x.*>baz$/, "foobarbaz".sub("bar") { o })
+
+    assert_equal(S("Abc"), S("abc").sub("a", "A"))
+    m = nil
+    assert_equal(S("Abc"), S("abc").sub("a") {m = $~; "A"})
+    assert_equal(S("a"), m[0])
+    assert_equal(/a/, m.regexp)
   end
 
   def test_sub!

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]