[前][次][番号順一覧][スレッド一覧]

ruby-changes:74225

From: Yusuke <ko1@a...>
Date: Mon, 24 Oct 2022 18:03:47 +0900 (JST)
Subject: [ruby-changes:74225] b51b22513f (master): Fix per-instance Regexp timeout (#6621)

https://git.ruby-lang.org/ruby.git/commit/?id=b51b22513f

From b51b22513f6a2a514a9e614e7a9f6e0df6c0c985 Mon Sep 17 00:00:00 2001
From: Yusuke Endoh <mame@r...>
Date: Mon, 24 Oct 2022 18:03:26 +0900
Subject: Fix per-instance Regexp timeout (#6621)

Fix per-instance Regexp timeout

This makes it follow what was decided in [Bug #19055]:

* `Regexp.new(str, timeout: nil)` should respect the global timeout
* `Regexp.new(str, timeout: huge_val)` should use the maximum value that
  can be represented in the internal representation
* `Regexp.new(str, timeout: 0 or negative value)` should raise an error
---
 hrtime.h                 |  1 +
 re.c                     | 10 +++++--
 test/ruby/test_regexp.rb | 73 ++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/hrtime.h b/hrtime.h
index 80aff5deb3..7ed4e6b04c 100644
--- a/hrtime.h
+++ b/hrtime.h
@@ -206,6 +206,7 @@ double2hrtime(rb_hrtime_t *hrt, double d) https://github.com/ruby/ruby/blob/trunk/hrtime.h#L206
     const double TIMESPEC_SEC_MAX_PLUS_ONE = 2.0 * (TIMESPEC_SEC_MAX_as_double / 2.0 + 1.0);
 
     if (TIMESPEC_SEC_MAX_PLUS_ONE <= d) {
+        *hrt = RB_HRTIME_MAX;
         return NULL;
     }
     else if (d <= 0) {
diff --git a/re.c b/re.c
index e16e631a15..c1214f392a 100644
--- a/re.c
+++ b/re.c
@@ -3772,6 +3772,8 @@ str_to_option(VALUE str) https://github.com/ruby/ruby/blob/trunk/re.c#L3772
  *  If optional keyword argument +timeout+ is given,
  *  its float value overrides the timeout interval for the class,
  *  Regexp.timeout.
+ *  If +nil+ is passed as +timeout, it uses the timeout interval
+ *  for the class, Regexp.timeout.
  *
  *  With argument +regexp+ given, returns a new regexp. The source,
  *  options, timeout are the same as +regexp+. +options+ and +n_flag+
@@ -3847,7 +3849,9 @@ rb_reg_initialize_m(int argc, VALUE *argv, VALUE self) https://github.com/ruby/ruby/blob/trunk/re.c#L3849
 
     {
         double limit = NIL_P(timeout) ? 0.0 : NUM2DBL(timeout);
-        if (limit < 0) limit = 0;
+        if (!NIL_P(timeout) && limit <= 0) {
+            rb_raise(rb_eArgError, "invalid timeout: %"PRIsVALUE, timeout);
+        }
         double2hrtime(&reg->timelimit, limit);
     }
 
@@ -4478,7 +4482,9 @@ rb_reg_s_timeout_set(VALUE dummy, VALUE limit) https://github.com/ruby/ruby/blob/trunk/re.c#L4482
 
     rb_ractor_ensure_main_ractor("can not access Regexp.timeout from non-main Ractors");
 
-    if (timeout < 0) timeout = 0;
+    if (!NIL_P(limit) && timeout <= 0) {
+        rb_raise(rb_eArgError, "invalid timeout: %"PRIsVALUE, limit);
+    }
     double2hrtime(&rb_reg_match_time_limit, timeout);
 
     return limit;
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index 7d7d7e5180..3d3cdbb46e 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -1579,7 +1579,7 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L1579
   end
 
   def test_s_timeout
-    assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}")
+    assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
     begin;
       timeout = EnvUtil.apply_timeout_scale(0.2)
 
@@ -1597,15 +1597,42 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L1597
     end;
   end
 
-  def test_timeout
-    assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}")
+  def test_s_timeout_corner_cases
+    assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
     begin;
-      dummy_timeout = EnvUtil.apply_timeout_scale(10)
-      timeout = EnvUtil.apply_timeout_scale(0.2)
+      assert_nil(Regexp.timeout)
+
+      # This is just an implementation detail that users should not depend on:
+      # If Regexp.timeout is set to a value greater than the value that can be
+      # represented in the internal representation of timeout, it uses the
+      # maximum value that can be represented.
+      Regexp.timeout = Float::INFINITY
+      assert_equal(((1<<64)-1) / 1000000000.0, Regexp.timeout)
 
-      Regexp.timeout = dummy_timeout # This should be ignored
+      Regexp.timeout = 1e300
+      assert_equal(((1<<64)-1) / 1000000000.0, Regexp.timeout)
 
-      re = Regexp.new("^a*b?a*$", timeout: timeout)
+      assert_raise(ArgumentError) { Regexp.timeout = 0 }
+      assert_raise(ArgumentError) { Regexp.timeout = -1 }
+
+      Regexp.timeout = nil
+      assert_nil(Regexp.timeout)
+    end;
+  end
+
+  def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout)
+    assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
+      global_timeout = #{ global_timeout.inspect }
+      per_instance_timeout = #{ per_instance_timeout.inspect }
+      expected_timeout = #{ expected_timeout.inspect }
+    begin;
+      global_timeout = EnvUtil.apply_timeout_scale(global_timeout)
+      per_instance_timeout = EnvUtil.apply_timeout_scale(per_instance_timeout)
+
+      Regexp.timeout = global_timeout
+
+      re = Regexp.new("^a*b?a*$", timeout: per_instance_timeout)
+      assert_equal(per_instance_timeout, re.timeout)
 
       t = Time.now
       assert_raise_with_message(Regexp::TimeoutError, "regexp match timeout") do
@@ -1613,7 +1640,37 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L1640
       end
       t = Time.now - t
 
-      assert_in_delta(timeout, t, timeout / 2)
+      assert_in_delta(expected_timeout, t, expected_timeout / 2)
+    end;
+  end
+
+  def test_timeout_shorter_than_global
+    per_instance_redos_test(10, 0.2, 0.2)
+  end
+
+  def test_timeout_longer_than_global
+    per_instance_redos_test(0.01, 0.5, 0.5)
+  end
+
+  def test_timeout_nil
+    per_instance_redos_test(0.5, nil, 0.5)
+  end
+
+  def test_timeout_corner_cases
+    assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
+    begin;
+      assert_nil(//.timeout)
+
+      # This is just an implementation detail that users should not depend on:
+      # If Regexp.timeout is set to a value greater than the value that can be
+      # represented in the internal representation of timeout, it uses the
+      # maximum value that can be represented.
+      assert_equal(((1<<64)-1) / 1000000000.0, Regexp.new("foo", timeout: Float::INFINITY).timeout)
+
+      assert_equal(((1<<64)-1) / 1000000000.0, Regexp.new("foo", timeout: 1e300).timeout)
+
+      assert_raise(ArgumentError) { Regexp.new("foo", timeout: 0) }
+      assert_raise(ArgumentError) { Regexp.new("foo", timeout: -1) }
     end;
   end
 end
-- 
cgit v1.2.3


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]