[前][次][番号順一覧][スレッド一覧]

ruby-changes:71556

From: Yusuke <ko1@a...>
Date: Wed, 30 Mar 2022 16:51:03 +0900 (JST)
Subject: [ruby-changes:71556] ffc3b37f96 (master): re.c: Add Regexp.timeout= and Regexp.timeout

https://git.ruby-lang.org/ruby.git/commit/?id=ffc3b37f96

From ffc3b37f969a779f93b8f8a5b3591b4ef7de1538 Mon Sep 17 00:00:00 2001
From: Yusuke Endoh <mame@r...>
Date: Thu, 24 Mar 2022 16:59:11 +0900
Subject: re.c: Add Regexp.timeout= and Regexp.timeout

[Feature #17837]
---
 include/ruby/onigmo.h    |  7 ++++
 re.c                     | 88 ++++++++++++++++++++++++++++++++++++++++++++++++
 regcomp.c                |  3 ++
 regexec.c                |  2 ++
 regint.h                 |  8 +++++
 test/ruby/test_regexp.rb | 17 ++++++++++
 6 files changed, 125 insertions(+)

diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h
index 6187b37dc3..a7ef59c7c8 100644
--- a/include/ruby/onigmo.h
+++ b/include/ruby/onigmo.h
@@ -793,6 +793,13 @@ typedef struct re_pattern_buffer { https://github.com/ruby/ruby/blob/trunk/include/ruby/onigmo.h#L793
   OnigDistance   dmin;                      /* min-distance of exact or map */
   OnigDistance   dmax;                      /* max-distance of exact or map */
 
+  /* rb_hrtime_t from hrtime.h */
+#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL
+  int128_t timelimit;
+#else
+  uint64_t timelimit;
+#endif
+
   /* regex_t link chain */
   struct re_pattern_buffer* chain;  /* escape compile-conflict */
 } OnigRegexType;
diff --git a/re.c b/re.c
index 9c1adbb0ff..50fa140fd0 100644
--- a/re.c
+++ b/re.c
@@ -14,12 +14,14 @@ https://github.com/ruby/ruby/blob/trunk/re.c#L14
 #include <ctype.h>
 
 #include "encindex.h"
+#include "hrtime.h"
 #include "internal.h"
 #include "internal/hash.h"
 #include "internal/imemo.h"
 #include "internal/re.h"
 #include "internal/string.h"
 #include "internal/variable.h"
+#include "ractor_core.h"
 #include "regint.h"
 #include "ruby/encoding.h"
 #include "ruby/re.h"
@@ -1593,6 +1595,9 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err) https://github.com/ruby/ruby/blob/trunk/re.c#L1595
 	rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
     }
 
+    // inherit the timeout settings
+    rb_hrtime_t timelimit = reg->timelimit;
+
     const char *ptr;
     long len;
     RSTRING_GETMEM(unescaped, ptr, len);
@@ -1604,6 +1609,8 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err) https://github.com/ruby/ruby/blob/trunk/re.c#L1609
 	rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
     }
 
+    reg->timelimit = timelimit;
+
     RB_GC_GUARD(unescaped);
     return reg;
 }
@@ -4091,6 +4098,84 @@ re_warn(const char *s) https://github.com/ruby/ruby/blob/trunk/re.c#L4098
     rb_warn("%s", s);
 }
 
+// The process-global timeout for regexp matching
+rb_hrtime_t rb_reg_match_time_limit = 0;
+
+// This function is periodically called during regexp matching
+void
+rb_reg_check_timeout(regex_t *reg, void *end_time_)
+{
+    rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
+
+    if (*end_time == 0) {
+        // This is the first time to check interrupts;
+        // just measure the current time and determine the end time
+        // if timeout is set.
+        rb_hrtime_t timelimit = reg->timelimit;
+
+        if (!timelimit) {
+            // no per-object timeout.
+            timelimit = rb_reg_match_time_limit;
+        }
+
+        if (timelimit) {
+            *end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
+        }
+        else {
+            // no timeout is set
+            *end_time = RB_HRTIME_MAX;
+        }
+    }
+    else {
+        if (*end_time < rb_hrtime_now()) {
+            // timeout is exceeded
+            rb_raise(rb_eRuntimeError, "regexp match timeout");
+        }
+    }
+}
+
+/*
+ *  call-seq:
+ *     Regexp.timeout  -> int or float or nil
+ *
+ *  It returns the current default timeout interval for Regexp matching in second.
+ *  +nil+ means no default timeout configuration.
+ */
+
+static VALUE
+rb_reg_s_timeout_get(VALUE dummy)
+{
+    double d = hrtime2double(rb_reg_match_time_limit);
+    if (d == 0.0) return Qnil;
+    return DBL2NUM(d);
+}
+
+/*
+ *  call-seq:
+ *     Regexp.timeout = int or float or nil
+ *
+ *  It sets the default timeout interval for Regexp matching in second.
+ *  +nil+ means no default timeout configuration.
+ *  This configuration is process-global. If you want to set timeout for
+ *  each Regexp, use +timeout+ keyword for <code>Regexp.new</code>.
+ *
+ *     Regexp.timeout = 1
+ *     /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
+ */
+
+static VALUE
+rb_reg_s_timeout_set(VALUE dummy, VALUE limit)
+{
+    double timeout = NIL_P(limit) ? 0.0 : NUM2DBL(limit);
+
+    rb_ractor_ensure_main_ractor("can not access Regexp.timeout from non-main Ractors");
+
+    if (timeout < 0) timeout = 0;
+    double2hrtime(&rb_reg_match_time_limit, timeout);
+
+    return limit;
+}
+
 /*
  *  Document-class: RegexpError
  *
@@ -4170,6 +4255,9 @@ Init_Regexp(void) https://github.com/ruby/ruby/blob/trunk/re.c#L4255
     rb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
     rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
 
+    rb_define_singleton_method(rb_cRegexp, "timeout", rb_reg_s_timeout_get, 0);
+    rb_define_singleton_method(rb_cRegexp, "timeout=", rb_reg_s_timeout_set, 1);
+
     /* see Regexp.options and Regexp.new */
     rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
     /* see Regexp.options and Regexp.new */
diff --git a/regcomp.c b/regcomp.c
index d51163103e..3e65c9d2e3 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -5973,6 +5973,9 @@ onig_reg_init(regex_t* reg, OnigOptionType option, https://github.com/ruby/ruby/blob/trunk/regcomp.c#L5973
   (reg)->name_table       = (void* )NULL;
 
   (reg)->case_fold_flag   = case_fold_flag;
+
+  (reg)->timelimit        = 0;
+
   return 0;
 }
 
diff --git a/regexec.c b/regexec.c
index da17c04a55..c77d48b1d9 100644
--- a/regexec.c
+++ b/regexec.c
@@ -422,6 +422,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) https://github.com/ruby/ruby/blob/trunk/regexec.c#L422
   (msa).gpos     = (arg_gpos);\
   (msa).best_len = ONIG_MISMATCH;\
   (msa).counter  = 0;\
+  (msa).end_time = 0;\
 } while(0)
 #else
 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
@@ -431,6 +432,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) https://github.com/ruby/ruby/blob/trunk/regexec.c#L432
   (msa).start    = (arg_start);\
   (msa).gpos     = (arg_gpos);\
   (msa).counter  = 0;\
+  (msa).end_time = 0;\
 } while(0)
 #endif
 
diff --git a/regint.h b/regint.h
index 0e9777cc1e..6c88f278c1 100644
--- a/regint.h
+++ b/regint.h
@@ -152,6 +152,7 @@ https://github.com/ruby/ruby/blob/trunk/regint.h#L152
   msa->counter++;                         \
   if (msa->counter >= 128) {              \
     msa->counter = 0;                     \
+    rb_reg_check_timeout(reg, &msa->end_time);  \
     rb_thread_check_ints();               \
   }                                       \
 } while(0)
@@ -877,6 +878,12 @@ typedef struct { https://github.com/ruby/ruby/blob/trunk/regint.h#L878
   int   state_check_buff_size;
 #endif
   int counter;
+  /* rb_hrtime_t from hrtime.h */
+#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL
+  int128_t end_time;
+#else
+  uint64_t end_time;
+#endif
 } OnigMatchArg;
 
 
@@ -942,6 +949,7 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c https://github.com/ruby/ruby/blob/trunk/regint.h#L949
 #ifdef RUBY
 extern size_t onig_memsize(const regex_t *reg);
 extern size_t onig_region_memsize(const struct re_registers *regs);
+void rb_reg_check_timeout(regex_t *reg, void *end_time);
 #endif
 
 RUBY_SYMBOL_EXPORT_END
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index 94098a850d..7bcddc6e07 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -1457,4 +1457,21 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L1457
     }
     assert_empty(errs, msg)
   end
+
+  def test_s_timeout
+    assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}")
+    begin;
+      Regexp.timeout = 0.2
+      assert_equal(0.2, Regexp.timeout)
+
+      t = Time.now
+      assert_raise_with_message(RuntimeError, "regexp match timeout") do
+        # A typical ReDoS case
+        /^(a*)*$/ =~ "a" * 1000000 + "x"
+      end
+      t = Time.now - t
+
+      assert_in_delta(0.2, t, 0.1)
+    end;
+  end
 end
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]