ruby-changes:71556
From: Yusuke <ko1@a...>
Date: Wed, 30 Mar 2022 16:51:03 +0900 (JST)
Subject: [ruby-changes:71556] ffc3b37f96 (master): re.c: Add Regexp.timeout= and Regexp.timeout
https://git.ruby-lang.org/ruby.git/commit/?id=ffc3b37f96 From ffc3b37f969a779f93b8f8a5b3591b4ef7de1538 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh <mame@r...> Date: Thu, 24 Mar 2022 16:59:11 +0900 Subject: re.c: Add Regexp.timeout= and Regexp.timeout [Feature #17837] --- include/ruby/onigmo.h | 7 ++++ re.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++ regcomp.c | 3 ++ regexec.c | 2 ++ regint.h | 8 +++++ test/ruby/test_regexp.rb | 17 ++++++++++ 6 files changed, 125 insertions(+) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 6187b37dc3..a7ef59c7c8 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -793,6 +793,13 @@ typedef struct re_pattern_buffer { https://github.com/ruby/ruby/blob/trunk/include/ruby/onigmo.h#L793 OnigDistance dmin; /* min-distance of exact or map */ OnigDistance dmax; /* max-distance of exact or map */ + /* rb_hrtime_t from hrtime.h */ +#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL + int128_t timelimit; +#else + uint64_t timelimit; +#endif + /* regex_t link chain */ struct re_pattern_buffer* chain; /* escape compile-conflict */ } OnigRegexType; diff --git a/re.c b/re.c index 9c1adbb0ff..50fa140fd0 100644 --- a/re.c +++ b/re.c @@ -14,12 +14,14 @@ https://github.com/ruby/ruby/blob/trunk/re.c#L14 #include <ctype.h> #include "encindex.h" +#include "hrtime.h" #include "internal.h" #include "internal/hash.h" #include "internal/imemo.h" #include "internal/re.h" #include "internal/string.h" #include "internal/variable.h" +#include "ractor_core.h" #include "regint.h" #include "ruby/encoding.h" #include "ruby/re.h" @@ -1593,6 +1595,9 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err) https://github.com/ruby/ruby/blob/trunk/re.c#L1595 rb_raise(rb_eArgError, "regexp preprocess failed: %s", err); } + // inherit the timeout settings + rb_hrtime_t timelimit = reg->timelimit; + const char *ptr; long len; RSTRING_GETMEM(unescaped, ptr, len); @@ -1604,6 +1609,8 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err) https://github.com/ruby/ruby/blob/trunk/re.c#L1609 rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re); } + reg->timelimit = timelimit; + RB_GC_GUARD(unescaped); return reg; } @@ -4091,6 +4098,84 @@ re_warn(const char *s) https://github.com/ruby/ruby/blob/trunk/re.c#L4098 rb_warn("%s", s); } +// The process-global timeout for regexp matching +rb_hrtime_t rb_reg_match_time_limit = 0; + +// This function is periodically called during regexp matching +void +rb_reg_check_timeout(regex_t *reg, void *end_time_) +{ + rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_; + + if (*end_time == 0) { + // This is the first time to check interrupts; + // just measure the current time and determine the end time + // if timeout is set. + rb_hrtime_t timelimit = reg->timelimit; + + if (!timelimit) { + // no per-object timeout. + timelimit = rb_reg_match_time_limit; + } + + if (timelimit) { + *end_time = rb_hrtime_add(timelimit, rb_hrtime_now()); + } + else { + // no timeout is set + *end_time = RB_HRTIME_MAX; + } + } + else { + if (*end_time < rb_hrtime_now()) { + // timeout is exceeded + rb_raise(rb_eRuntimeError, "regexp match timeout"); + } + } +} + +/* + * call-seq: + * Regexp.timeout -> int or float or nil + * + * It returns the current default timeout interval for Regexp matching in second. + * +nil+ means no default timeout configuration. + */ + +static VALUE +rb_reg_s_timeout_get(VALUE dummy) +{ + double d = hrtime2double(rb_reg_match_time_limit); + if (d == 0.0) return Qnil; + return DBL2NUM(d); +} + +/* + * call-seq: + * Regexp.timeout = int or float or nil + * + * It sets the default timeout interval for Regexp matching in second. + * +nil+ means no default timeout configuration. + * This configuration is process-global. If you want to set timeout for + * each Regexp, use +timeout+ keyword for <code>Regexp.new</code>. + * + * Regexp.timeout = 1 + * /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError) + */ + +static VALUE +rb_reg_s_timeout_set(VALUE dummy, VALUE limit) +{ + double timeout = NIL_P(limit) ? 0.0 : NUM2DBL(limit); + + rb_ractor_ensure_main_ractor("can not access Regexp.timeout from non-main Ractors"); + + if (timeout < 0) timeout = 0; + double2hrtime(&rb_reg_match_time_limit, timeout); + + return limit; +} + /* * Document-class: RegexpError * @@ -4170,6 +4255,9 @@ Init_Regexp(void) https://github.com/ruby/ruby/blob/trunk/re.c#L4255 rb_define_method(rb_cRegexp, "names", rb_reg_names, 0); rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0); + rb_define_singleton_method(rb_cRegexp, "timeout", rb_reg_s_timeout_get, 0); + rb_define_singleton_method(rb_cRegexp, "timeout=", rb_reg_s_timeout_set, 1); + /* see Regexp.options and Regexp.new */ rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE)); /* see Regexp.options and Regexp.new */ diff --git a/regcomp.c b/regcomp.c index d51163103e..3e65c9d2e3 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5973,6 +5973,9 @@ onig_reg_init(regex_t* reg, OnigOptionType option, https://github.com/ruby/ruby/blob/trunk/regcomp.c#L5973 (reg)->name_table = (void* )NULL; (reg)->case_fold_flag = case_fold_flag; + + (reg)->timelimit = 0; + return 0; } diff --git a/regexec.c b/regexec.c index da17c04a55..c77d48b1d9 100644 --- a/regexec.c +++ b/regexec.c @@ -422,6 +422,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) https://github.com/ruby/ruby/blob/trunk/regexec.c#L422 (msa).gpos = (arg_gpos);\ (msa).best_len = ONIG_MISMATCH;\ (msa).counter = 0;\ + (msa).end_time = 0;\ } while(0) #else # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ @@ -431,6 +432,7 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) https://github.com/ruby/ruby/blob/trunk/regexec.c#L432 (msa).start = (arg_start);\ (msa).gpos = (arg_gpos);\ (msa).counter = 0;\ + (msa).end_time = 0;\ } while(0) #endif diff --git a/regint.h b/regint.h index 0e9777cc1e..6c88f278c1 100644 --- a/regint.h +++ b/regint.h @@ -152,6 +152,7 @@ https://github.com/ruby/ruby/blob/trunk/regint.h#L152 msa->counter++; \ if (msa->counter >= 128) { \ msa->counter = 0; \ + rb_reg_check_timeout(reg, &msa->end_time); \ rb_thread_check_ints(); \ } \ } while(0) @@ -877,6 +878,12 @@ typedef struct { https://github.com/ruby/ruby/blob/trunk/regint.h#L878 int state_check_buff_size; #endif int counter; + /* rb_hrtime_t from hrtime.h */ +#ifdef MY_RUBY_BUILD_MAY_TIME_TRAVEL + int128_t end_time; +#else + uint64_t end_time; +#endif } OnigMatchArg; @@ -942,6 +949,7 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c https://github.com/ruby/ruby/blob/trunk/regint.h#L949 #ifdef RUBY extern size_t onig_memsize(const regex_t *reg); extern size_t onig_region_memsize(const struct re_registers *regs); +void rb_reg_check_timeout(regex_t *reg, void *end_time); #endif RUBY_SYMBOL_EXPORT_END diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 94098a850d..7bcddc6e07 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1457,4 +1457,21 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L1457 } assert_empty(errs, msg) end + + def test_s_timeout + assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}") + begin; + Regexp.timeout = 0.2 + assert_equal(0.2, Regexp.timeout) + + t = Time.now + assert_raise_with_message(RuntimeError, "regexp match timeout") do + # A typical ReDoS case + /^(a*)*$/ =~ "a" * 1000000 + "x" + end + t = Time.now - t + + assert_in_delta(0.2, t, 0.1) + end; + end end -- cgit v1.2.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/