ruby-changes:62565
From: Kasumi <ko1@a...>
Date: Thu, 13 Aug 2020 20:51:15 +0900 (JST)
Subject: [ruby-changes:62565] 5d71eed1a7 (master): rb_str_{partition, rpartition}_m: Handle /\K/ in pattern
https://git.ruby-lang.org/ruby.git/commit/?id=5d71eed1a7 From 5d71eed1a7f0a70db013de59cd7e95bdca0d5c0e Mon Sep 17 00:00:00 2001 From: Kasumi Hanazuki <kasumi@r...> Date: Thu, 13 Aug 2020 03:37:32 +0000 Subject: rb_str_{partition,rpartition}_m: Handle /\K/ in pattern When the pattern given to String#partition and String#rpartition contain a /\K/ (lookbehind) operator, the methods return strings sliced at incorrect positions. ``` # without patch "abcdbce".partition(/b\Kc/) # => ["a", "c", "cdbce"] "abcdbce".rpartition(/b\Kc/) # => ["abcd", "c", "ce"] ``` This patch fixes the problem by using BEG(0) instead of the return value of rb_reg_search. ``` # with patch "abcdbce".partition(/b\Kc/) # => ["ab", "c", "dbce"] "abcdbce".rpartition(/b\Kc/) # => ["abcdb", "c", "e"] ``` As a side-effect this patch makes String#partition 2x faster when the pattern is a costly Regexp by performing Regexp search only once, which was unexpectedly done twice in the original implementation. Fixes [Bug #17119] diff --git a/string.c b/string.c index c2cdd00..e1c6e54 100644 --- a/string.c +++ b/string.c @@ -9940,11 +9940,14 @@ rb_str_partition(VALUE str, VALUE sep) https://github.com/ruby/ruby/blob/trunk/string.c#L9940 sep = get_pat_quoted(sep, 0); if (RB_TYPE_P(sep, T_REGEXP)) { - pos = rb_reg_search(sep, str, 0, 0); - if (pos < 0) { + if (rb_reg_search(sep, str, 0, 0) < 0) { goto failed; } - sep = rb_str_subpat(str, sep, INT2FIX(0)); + VALUE match = rb_backref_get(); + struct re_registers *regs = RMATCH_REGS(match); + + pos = BEG(0); + sep = rb_str_subseq(str, pos, END(0) - pos); } else { pos = rb_str_index(str, sep, 0); @@ -9978,37 +9981,33 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/string.c#L9981 rb_str_rpartition(VALUE str, VALUE sep) { long pos = RSTRING_LEN(str); - int regex = FALSE; + sep = get_pat_quoted(sep, 0); if (RB_TYPE_P(sep, T_REGEXP)) { - pos = rb_reg_search(sep, str, pos, 1); - regex = TRUE; + if (rb_reg_search(sep, str, pos, 1) < 0) { + goto failed; + } + VALUE match = rb_backref_get(); + struct re_registers *regs = RMATCH_REGS(match); + + pos = BEG(0); + sep = rb_str_subseq(str, pos, END(0) - pos); } else { - VALUE tmp; - - tmp = rb_check_string_type(sep); - if (NIL_P(tmp)) { - rb_raise(rb_eTypeError, "type mismatch: %s given", - rb_obj_classname(sep)); - } - sep = tmp; pos = rb_str_sublen(str, pos); pos = rb_str_rindex(str, sep, pos); + if(pos < 0) { + goto failed; + } + pos = rb_str_offset(str, pos); } - if (pos < 0) { - return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), rb_str_dup(str)); - } - if (regex) { - sep = rb_reg_nth_match(0, rb_backref_get()); - } - else { - pos = rb_str_offset(str, pos); - } + return rb_ary_new3(3, rb_str_subseq(str, 0, pos), sep, rb_str_subseq(str, pos+RSTRING_LEN(sep), RSTRING_LEN(str)-pos-RSTRING_LEN(sep))); + failed: + return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), rb_str_dup(str)); } /* diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index c4c7d55..308ba80 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -2603,6 +2603,8 @@ CODE https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L2603 assert_equal("hello", hello, bug) assert_equal(["", "", "foo"], "foo".partition(/^=*/)) + + assert_equal([S("ab"), S("c"), S("dbce")], S("abcdbce").partition(/b\Kc/)) end def test_rpartition @@ -2627,6 +2629,8 @@ CODE https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L2629 hello = "hello" hello.rpartition("hi").map(&:upcase!) assert_equal("hello", hello, bug) + + assert_equal([S("abcdb"), S("c"), S("e")], S("abcdbce").rpartition(/b\Kc/)) end def test_setter -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/