[前][次][番号順一覧][スレッド一覧]

ruby-changes:47380

From: nobu <ko1@a...>
Date: Fri, 4 Aug 2017 13:39:59 +0900 (JST)
Subject: [ruby-changes:47380] nobu:r59496 (trunk): string.c: improve String#scan

nobu	2017-08-04 13:39:53 +0900 (Fri, 04 Aug 2017)

  New Revision: 59496

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59496

  Log:
    string.c: improve String#scan
    
    * string.c (rb_str_rstrip_bang): improve the performance in 50%
      for a string pattern, and in 10% for a regexp pattern.  get rid
      of making MatchData in middle, which is not used.

  Modified files:
    trunk/string.c
Index: string.c
===================================================================
--- string.c	(revision 59495)
+++ string.c	(revision 59496)
@@ -8564,35 +8564,49 @@ rb_str_strip(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L8564
 }
 
 static VALUE
-scan_once(VALUE str, VALUE pat, long *start)
+scan_once(VALUE str, VALUE pat, long *start, int set_backref_str)
 {
     VALUE result, match;
     struct re_registers *regs;
     int i;
-
-    if (rb_pat_search(pat, str, *start, 1) >= 0) {
-	match = rb_backref_get();
-	regs = RMATCH_REGS(match);
-	if (BEG(0) == END(0)) {
+    long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
+    if (pos >= 0) {
+	if (BUILTIN_TYPE(pat) == T_STRING) {
+	    regs = NULL;
+	    end = pos + RSTRING_LEN(pat);
+	}
+	else {
+	    match = rb_backref_get();
+	    regs = RMATCH_REGS(match);
+	    end = END(0);
+	}
+	if (pos == end) {
 	    rb_encoding *enc = STR_ENC_GET(str);
 	    /*
 	     * Always consume at least one character of the input string
 	     */
-	    if (RSTRING_LEN(str) > END(0))
-		*start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0),
-						   RSTRING_END(str), enc);
+	    if (RSTRING_LEN(str) > end)
+		*start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
+						  RSTRING_END(str), enc);
 	    else
-		*start = END(0)+1;
+		*start = end + 1;
 	}
 	else {
-	    *start = END(0);
+	    *start = end;
 	}
-	if (regs->num_regs == 1) {
-	    return rb_reg_nth_match(0, match);
+	if (!regs || regs->num_regs == 1) {
+	    result = rb_str_subseq(str, pos, end - pos);
+	    OBJ_INFECT(result, pat);
+	    return result;
 	}
 	result = rb_ary_new2(regs->num_regs);
 	for (i=1; i < regs->num_regs; i++) {
-	    rb_ary_push(result, rb_reg_nth_match(i, match));
+	    VALUE s = Qnil;
+	    if (BEG(i) >= 0) {
+		s = rb_str_subseq(str, BEG(i), END(i)-BEG(i));
+		OBJ_INFECT(s, pat);
+	    }
+	    rb_ary_push(result, s);
 	}
 
 	return result;
@@ -8645,16 +8659,17 @@ rb_str_scan(VALUE str, VALUE pat) https://github.com/ruby/ruby/blob/trunk/string.c#L8659
     if (!rb_block_given_p()) {
 	VALUE ary = rb_ary_new();
 
-	while (!NIL_P(result = scan_once(str, pat, &start))) {
+	while (!NIL_P(result = scan_once(str, pat, &start, 0))) {
 	    last = prev;
 	    prev = start;
 	    rb_ary_push(ary, result);
 	}
 	if (last >= 0) rb_pat_search(pat, str, last, 1);
+	else rb_backref_set(Qnil);
 	return ary;
     }
 
-    while (!NIL_P(result = scan_once(str, pat, &start))) {
+    while (!NIL_P(result = scan_once(str, pat, &start, 1))) {
 	last = prev;
 	prev = start;
 	rb_yield(result);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]