ruby-changes:66194
From: Jeremy <ko1@a...>
Date: Thu, 13 May 2021 10:55:58 +0900 (JST)
Subject: [ruby-changes:66194] 11ae581a4a (master): Fix handling of control/meta escapes in literal regexps
https://git.ruby-lang.org/ruby.git/commit/?id=11ae581a4a From 11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 Mon Sep 17 00:00:00 2001 From: Jeremy Evans <code@j...> Date: Wed, 12 May 2021 12:37:55 -0700 Subject: Fix handling of control/meta escapes in literal regexps Ruby uses a recursive algorithm for handling control/meta escapes in strings (read_escape). However, the equivalent code for regexps (tokadd_escape) in did not use a recursive algorithm. Due to this, Handling of control/meta escapes in regexp did not have the same behavior as in strings, leading to behavior such as the following returning nil: ```ruby /\c\xFF/ =~ "\c\xFF" ``` Switch the code for handling \c, \C and \M in literal regexps to use the same code as for strings (read_escape), to keep behavior consistent between the two. Fixes [Bug #14367] --- parse.y | 50 +++++++++---------------- spec/ruby/language/regexp/interpolation_spec.rb | 2 +- test/ruby/test_regexp.rb | 18 +++++++++ 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/parse.y b/parse.y index fc1fbd7..66813e5 100644 --- a/parse.y +++ b/parse.y @@ -6902,10 +6902,8 @@ static int https://github.com/ruby/ruby/blob/trunk/parse.y#L6902 tokadd_escape(struct parser_params *p, rb_encoding **encp) { int c; - int flags = 0; size_t numlen; - first: switch (c = nextc(p)) { case '\n': return 0; /* just ignore */ @@ -6928,37 +6926,6 @@ tokadd_escape(struct parser_params *p, rb_encoding **encp) https://github.com/ruby/ruby/blob/trunk/parse.y#L6926 } return 0; - case 'M': - if (flags & ESCAPE_META) goto eof; - if ((c = nextc(p)) != '-') { - pushback(p, c); - goto eof; - } - tokcopy(p, 3); - flags |= ESCAPE_META; - goto escaped; - - case 'C': - if (flags & ESCAPE_CONTROL) goto eof; - if ((c = nextc(p)) != '-') { - pushback(p, c); - goto eof; - } - tokcopy(p, 3); - goto escaped; - - case 'c': - if (flags & ESCAPE_CONTROL) goto eof; - tokcopy(p, 2); - flags |= ESCAPE_CONTROL; - escaped: - if ((c = nextc(p)) == '\\') { - goto first; - } - else if (c == -1) goto eof; - tokadd(p, c); - return 0; - eof: case -1: yyerror0("Invalid escape character syntax"); @@ -7151,6 +7118,23 @@ tokadd_string(struct parser_params *p, https://github.com/ruby/ruby/blob/trunk/parse.y#L7118 goto non_ascii; } if (func & STR_FUNC_REGEXP) { + switch (c) { + case 'c': + case 'C': + case 'M': { + pushback(p, c); + c = read_escape(p, 0, enc); + + int i; + char escbuf[5]; + snprintf(escbuf, sizeof(escbuf), "\\x%02X", c); + for(i = 0; i < 4; i++) { + tokadd(p, escbuf[i]); + } + continue; + } + } + if (c == term && !simple_re_meta(c)) { tokadd(p, c); continue; diff --git a/spec/ruby/language/regexp/interpolation_spec.rb b/spec/ruby/language/regexp/interpolation_spec.rb index ed0b724..6951fd3 100644 --- a/spec/ruby/language/regexp/interpolation_spec.rb +++ b/spec/ruby/language/regexp/interpolation_spec.rb @@ -36,7 +36,7 @@ describe "Regexps with interpolation" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/language/regexp/interpolation_spec.rb#L36 it "gives precedence to escape sequences over substitution" do str = "J" - /\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})' + /\c#{str}/.to_s.should include('{str}') end it "throws RegexpError for malformed interpolation" do diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index a9e0cdf..5ba50b3 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -496,6 +496,24 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L496 assert_raise(RegexpError) { Regexp.new("((?<v>))\\g<0>") } end + def test_match_control_meta_escape + assert_equal(0, /\c\xFF/ =~ "\c\xFF") + assert_equal(0, /\c\M-\xFF/ =~ "\c\M-\xFF") + assert_equal(0, /\C-\xFF/ =~ "\C-\xFF") + assert_equal(0, /\C-\M-\xFF/ =~ "\C-\M-\xFF") + assert_equal(0, /\M-\xFF/ =~ "\M-\xFF") + assert_equal(0, /\M-\C-\xFF/ =~ "\M-\C-\xFF") + assert_equal(0, /\M-\c\xFF/ =~ "\M-\c\xFF") + + assert_nil(/\c\xFE/ =~ "\c\xFF") + assert_nil(/\c\M-\xFE/ =~ "\c\M-\xFF") + assert_nil(/\C-\xFE/ =~ "\C-\xFF") + assert_nil(/\C-\M-\xFE/ =~ "\C-\M-\xFF") + assert_nil(/\M-\xFE/ =~ "\M-\xFF") + assert_nil(/\M-\C-\xFE/ =~ "\M-\C-\xFF") + assert_nil(/\M-\c\xFE/ =~ "\M-\c\xFF") + end + def test_unescape assert_raise(ArgumentError) { s = '\\'; /#{ s }/ } assert_equal(/\xFF/n, /#{ s="\\xFF" }/n) -- cgit v1.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/