[前][次][番号順一覧][スレッド一覧]

ruby-changes:68329

From: nagachika <ko1@a...>
Date: Sat, 9 Oct 2021 15:37:22 +0900 (JST)
Subject: [ruby-changes:68329] cfad0583eb (ruby_3_0): merge revision(s) abc0304cb28cb9dcc3476993bc487884c139fd11: [Backport #17507]

https://git.ruby-lang.org/ruby.git/commit/?id=cfad0583eb

From cfad0583eb18bb4505f28ee5cfab703a6b9987be Mon Sep 17 00:00:00 2001
From: nagachika <nagachika@r...>
Date: Sat, 9 Oct 2021 15:36:53 +0900
Subject: merge revision(s) abc0304cb28cb9dcc3476993bc487884c139fd11: [Backport
 #17507]

	Avoid race condition in Regexp#match

	In certain conditions, Regexp#match could return a MatchData with
	missing captures.  This seems to require at the least, multiple
	threads calling a method that calls the same block/proc/lambda
	which calls Regexp#match.

	The race condition happens because the MatchData is passed from
	indirectly via the backref, and other threads can modify the
	backref.

	Fix the issue by:

	1. Not reusing the existing MatchData from the backref, and always
	   allocating a new MatchData.
	2. Passing the MatchData directly to the caller using a VALUE*,
	   instead of indirectly through the backref.

	It's likely that variants of this issue exist for other Regexp
	methods.  Anywhere that MatchData is passed implicitly through
	the backref is probably vulnerable to this issue.

	Fixes [Bug #17507]
	---
	 re.c                     | 46 +++++++++++++++++++---------------------------
	 test/ruby/test_regexp.rb | 21 +++++++++++++++++++++
	 2 files changed, 40 insertions(+), 27 deletions(-)
---
 re.c                     | 46 +++++++++++++++++++---------------------------
 test/ruby/test_regexp.rb | 21 +++++++++++++++++++++
 version.h                |  2 +-
 3 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/re.c b/re.c
index f3e8954185..5b6f129963 100644
--- a/re.c
+++ b/re.c
@@ -1546,8 +1546,8 @@ rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse) https://github.com/ruby/ruby/blob/trunk/re.c#L1546
 }
 
 /* returns byte offset */
-long
-rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
+static long
+rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
 {
     long result;
     VALUE match;
@@ -1569,18 +1569,7 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) https://github.com/ruby/ruby/blob/trunk/re.c#L1569
     tmpreg = reg != RREGEXP_PTR(re);
     if (!tmpreg) RREGEXP(re)->usecnt++;
 
-    match = rb_backref_get();
-    if (!NIL_P(match)) {
-	if (FL_TEST(match, MATCH_BUSY)) {
-	    match = Qnil;
-	}
-	else {
-	    regs = RMATCH_REGS(match);
-	}
-    }
-    if (NIL_P(match)) {
-	MEMZERO(regs, struct re_registers, 1);
-    }
+    MEMZERO(regs, struct re_registers, 1);
     if (!reverse) {
 	range += len;
     }
@@ -1613,13 +1602,10 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) https://github.com/ruby/ruby/blob/trunk/re.c#L1602
 	}
     }
 
-    if (NIL_P(match)) {
-	int err;
-	match = match_alloc(rb_cMatch);
-	err = rb_reg_region_copy(RMATCH_REGS(match), regs);
-	onig_region_free(regs, 0);
-	if (err) rb_memerror();
-    }
+    match = match_alloc(rb_cMatch);
+    int copy_err = rb_reg_region_copy(RMATCH_REGS(match), regs);
+    onig_region_free(regs, 0);
+    if (copy_err) rb_memerror();
 
     if (set_backref_str) {
 	RMATCH(match)->str = rb_str_new4(str);
@@ -1627,10 +1613,17 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) https://github.com/ruby/ruby/blob/trunk/re.c#L1613
 
     RMATCH(match)->regexp = re;
     rb_backref_set(match);
+    if (set_match) *set_match = match;
 
     return result;
 }
 
+long
+rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
+{
+    return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
+}
+
 long
 rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
 {
@@ -3127,7 +3120,7 @@ reg_operand(VALUE s, int check) https://github.com/ruby/ruby/blob/trunk/re.c#L3120
 }
 
 static long
-reg_match_pos(VALUE re, VALUE *strp, long pos)
+reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match)
 {
     VALUE str = *strp;
 
@@ -3146,7 +3139,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos) https://github.com/ruby/ruby/blob/trunk/re.c#L3139
 	}
 	pos = rb_str_offset(str, pos);
     }
-    return rb_reg_search(re, str, pos, 0);
+    return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
 }
 
 /*
@@ -3200,7 +3193,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos) https://github.com/ruby/ruby/blob/trunk/re.c#L3193
 VALUE
 rb_reg_match(VALUE re, VALUE str)
 {
-    long pos = reg_match_pos(re, &str, 0);
+    long pos = reg_match_pos(re, &str, 0, NULL);
     if (pos < 0) return Qnil;
     pos = rb_str_sublen(str, pos);
     return LONG2FIX(pos);
@@ -3311,7 +3304,7 @@ rb_reg_match2(VALUE re) https://github.com/ruby/ruby/blob/trunk/re.c#L3304
 static VALUE
 rb_reg_match_m(int argc, VALUE *argv, VALUE re)
 {
-    VALUE result, str, initpos;
+    VALUE result = Qnil, str, initpos;
     long pos;
 
     if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
@@ -3321,12 +3314,11 @@ rb_reg_match_m(int argc, VALUE *argv, VALUE re) https://github.com/ruby/ruby/blob/trunk/re.c#L3314
 	pos = 0;
     }
 
-    pos = reg_match_pos(re, &str, pos);
+    pos = reg_match_pos(re, &str, pos, &result);
     if (pos < 0) {
 	rb_backref_set(Qnil);
 	return Qnil;
     }
-    result = rb_backref_get();
     rb_match_busy(result);
     if (!NIL_P(result) && rb_block_given_p()) {
 	return rb_yield(result);
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index bc98170dcc..35d20eeda6 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -265,6 +265,27 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L265
     assert_equal(re, re.match("foo").regexp)
   end
 
+  def test_match_lambda_multithread
+    bug17507 = "[ruby-core:101901]"
+    str = "a-x-foo-bar-baz-z-b"
+
+    worker = lambda do
+      m = /foo-([A-Za-z0-9_\.]+)-baz/.match(str)
+      assert_equal("bar", m[1], bug17507)
+
+      # These two lines are needed to trigger the bug
+      File.exist? "/tmp"
+      str.gsub(/foo-bar-baz/, "foo-abc-baz")
+    end
+
+    def self. threaded_test(worker)
+      6.times.map {Thread.new {10_000.times {worker.call}}}.each(&:join)
+    end
+
+    # The bug only occurs in a method calling a block/proc/lambda
+    threaded_test(worker)
+  end
+
   def test_source
     bug5484 = '[ruby-core:40364]'
     assert_equal('', //.source)
diff --git a/version.h b/version.h
index 22a9985392..0f6d8b6c05 100644
--- a/version.h
+++ b/version.h
@@ -12,7 +12,7 @@ https://github.com/ruby/ruby/blob/trunk/version.h#L12
 # define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
 #define RUBY_VERSION_TEENY 3
 #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
-#define RUBY_PATCHLEVEL 142
+#define RUBY_PATCHLEVEL 143
 
 #define RUBY_RELEASE_YEAR 2021
 #define RUBY_RELEASE_MONTH 10
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]