ruby-changes:68201
From: Jeremy <ko1@a...>
Date: Sat, 2 Oct 2021 13:50:30 +0900 (JST)
Subject: [ruby-changes:68201] abc0304cb2 (master): Avoid race condition in Regexp#match
https://git.ruby-lang.org/ruby.git/commit/?id=abc0304cb2 From abc0304cb28cb9dcc3476993bc487884c139fd11 Mon Sep 17 00:00:00 2001 From: Jeremy Evans <code@j...> Date: Wed, 11 Aug 2021 13:50:59 -0700 Subject: Avoid race condition in Regexp#match In certain conditions, Regexp#match could return a MatchData with missing captures. This seems to require at the least, multiple threads calling a method that calls the same block/proc/lambda which calls Regexp#match. The race condition happens because the MatchData is passed from indirectly via the backref, and other threads can modify the backref. Fix the issue by: 1. Not reusing the existing MatchData from the backref, and always allocating a new MatchData. 2. Passing the MatchData directly to the caller using a VALUE*, instead of indirectly through the backref. It's likely that variants of this issue exist for other Regexp methods. Anywhere that MatchData is passed implicitly through the backref is probably vulnerable to this issue. Fixes [Bug #17507] --- re.c | 46 +++++++++++++++++++--------------------------- test/ruby/test_regexp.rb | 21 +++++++++++++++++++++ 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/re.c b/re.c index 8e2df1f82c..58e43f9c29 100644 --- a/re.c +++ b/re.c @@ -1615,8 +1615,8 @@ rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse) https://github.com/ruby/ruby/blob/trunk/re.c#L1615 } /* returns byte offset */ -long -rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) +static long +rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match) { long result; VALUE match; @@ -1638,18 +1638,7 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) https://github.com/ruby/ruby/blob/trunk/re.c#L1638 tmpreg = reg != RREGEXP_PTR(re); if (!tmpreg) RREGEXP(re)->usecnt++; - match = rb_backref_get(); - if (!NIL_P(match)) { - if (FL_TEST(match, MATCH_BUSY)) { - match = Qnil; - } - else { - regs = RMATCH_REGS(match); - } - } - if (NIL_P(match)) { - MEMZERO(regs, struct re_registers, 1); - } + MEMZERO(regs, struct re_registers, 1); if (!reverse) { range += len; } @@ -1682,13 +1671,10 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) https://github.com/ruby/ruby/blob/trunk/re.c#L1671 } } - if (NIL_P(match)) { - int err; - match = match_alloc(rb_cMatch); - err = rb_reg_region_copy(RMATCH_REGS(match), regs); - onig_region_free(regs, 0); - if (err) rb_memerror(); - } + match = match_alloc(rb_cMatch); + int copy_err = rb_reg_region_copy(RMATCH_REGS(match), regs); + onig_region_free(regs, 0); + if (copy_err) rb_memerror(); if (set_backref_str) { RMATCH(match)->str = rb_str_new4(str); @@ -1696,10 +1682,17 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) https://github.com/ruby/ruby/blob/trunk/re.c#L1682 RMATCH(match)->regexp = re; rb_backref_set(match); + if (set_match) *set_match = match; return result; } +long +rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) +{ + return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL); +} + long rb_reg_search(VALUE re, VALUE str, long pos, int reverse) { @@ -3193,7 +3186,7 @@ reg_operand(VALUE s, int check) https://github.com/ruby/ruby/blob/trunk/re.c#L3186 } static long -reg_match_pos(VALUE re, VALUE *strp, long pos) +reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match) { VALUE str = *strp; @@ -3212,7 +3205,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos) https://github.com/ruby/ruby/blob/trunk/re.c#L3205 } pos = rb_str_offset(str, pos); } - return rb_reg_search(re, str, pos, 0); + return rb_reg_search_set_match(re, str, pos, 0, 1, set_match); } /* @@ -3266,7 +3259,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos) https://github.com/ruby/ruby/blob/trunk/re.c#L3259 VALUE rb_reg_match(VALUE re, VALUE str) { - long pos = reg_match_pos(re, &str, 0); + long pos = reg_match_pos(re, &str, 0, NULL); if (pos < 0) return Qnil; pos = rb_str_sublen(str, pos); return LONG2FIX(pos); @@ -3377,7 +3370,7 @@ rb_reg_match2(VALUE re) https://github.com/ruby/ruby/blob/trunk/re.c#L3370 static VALUE rb_reg_match_m(int argc, VALUE *argv, VALUE re) { - VALUE result, str, initpos; + VALUE result = Qnil, str, initpos; long pos; if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) { @@ -3387,12 +3380,11 @@ rb_reg_match_m(int argc, VALUE *argv, VALUE re) https://github.com/ruby/ruby/blob/trunk/re.c#L3380 pos = 0; } - pos = reg_match_pos(re, &str, pos); + pos = reg_match_pos(re, &str, pos, &result); if (pos < 0) { rb_backref_set(Qnil); return Qnil; } - result = rb_backref_get(); rb_match_busy(result); if (!NIL_P(result) && rb_block_given_p()) { return rb_yield(result); diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 7c76f5c150..2bf4649f14 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -265,6 +265,27 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L265 assert_equal(re, re.match("foo").regexp) end + def test_match_lambda_multithread + bug17507 = "[ruby-core:101901]" + str = "a-x-foo-bar-baz-z-b" + + worker = lambda do + m = /foo-([A-Za-z0-9_\.]+)-baz/.match(str) + assert_equal("bar", m[1], bug17507) + + # These two lines are needed to trigger the bug + File.exist? "/tmp" + str.gsub(/foo-bar-baz/, "foo-abc-baz") + end + + def self. threaded_test(worker) + 6.times.map {Thread.new {10_000.times {worker.call}}}.each(&:join) + end + + # The bug only occurs in a method calling a block/proc/lambda + threaded_test(worker) + end + def test_source bug5484 = '[ruby-core:40364]' assert_equal('', //.source) -- cgit v1.2.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/