ruby-changes:47380
From: nobu <ko1@a...>
Date: Fri, 4 Aug 2017 13:39:59 +0900 (JST)
Subject: [ruby-changes:47380] nobu:r59496 (trunk): string.c: improve String#scan
nobu 2017-08-04 13:39:53 +0900 (Fri, 04 Aug 2017) New Revision: 59496 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59496 Log: string.c: improve String#scan * string.c (rb_str_rstrip_bang): improve the performance in 50% for a string pattern, and in 10% for a regexp pattern. get rid of making MatchData in middle, which is not used. Modified files: trunk/string.c Index: string.c =================================================================== --- string.c (revision 59495) +++ string.c (revision 59496) @@ -8564,35 +8564,49 @@ rb_str_strip(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L8564 } static VALUE -scan_once(VALUE str, VALUE pat, long *start) +scan_once(VALUE str, VALUE pat, long *start, int set_backref_str) { VALUE result, match; struct re_registers *regs; int i; - - if (rb_pat_search(pat, str, *start, 1) >= 0) { - match = rb_backref_get(); - regs = RMATCH_REGS(match); - if (BEG(0) == END(0)) { + long end, pos = rb_pat_search(pat, str, *start, set_backref_str); + if (pos >= 0) { + if (BUILTIN_TYPE(pat) == T_STRING) { + regs = NULL; + end = pos + RSTRING_LEN(pat); + } + else { + match = rb_backref_get(); + regs = RMATCH_REGS(match); + end = END(0); + } + if (pos == end) { rb_encoding *enc = STR_ENC_GET(str); /* * Always consume at least one character of the input string */ - if (RSTRING_LEN(str) > END(0)) - *start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0), - RSTRING_END(str), enc); + if (RSTRING_LEN(str) > end) + *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end, + RSTRING_END(str), enc); else - *start = END(0)+1; + *start = end + 1; } else { - *start = END(0); + *start = end; } - if (regs->num_regs == 1) { - return rb_reg_nth_match(0, match); + if (!regs || regs->num_regs == 1) { + result = rb_str_subseq(str, pos, end - pos); + OBJ_INFECT(result, pat); + return result; } result = rb_ary_new2(regs->num_regs); for (i=1; i < regs->num_regs; i++) { - rb_ary_push(result, rb_reg_nth_match(i, match)); + VALUE s = Qnil; + if (BEG(i) >= 0) { + s = rb_str_subseq(str, BEG(i), END(i)-BEG(i)); + OBJ_INFECT(s, pat); + } + rb_ary_push(result, s); } return result; @@ -8645,16 +8659,17 @@ rb_str_scan(VALUE str, VALUE pat) https://github.com/ruby/ruby/blob/trunk/string.c#L8659 if (!rb_block_given_p()) { VALUE ary = rb_ary_new(); - while (!NIL_P(result = scan_once(str, pat, &start))) { + while (!NIL_P(result = scan_once(str, pat, &start, 0))) { last = prev; prev = start; rb_ary_push(ary, result); } if (last >= 0) rb_pat_search(pat, str, last, 1); + else rb_backref_set(Qnil); return ary; } - while (!NIL_P(result = scan_once(str, pat, &start))) { + while (!NIL_P(result = scan_once(str, pat, &start, 1))) { last = prev; prev = start; rb_yield(result); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/