ruby-changes:66194

https://git.ruby-lang.org/ruby.git/commit/?id=11ae581a4a

From 11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 Mon Sep 17 00:00:00 2001
From: Jeremy Evans <code@j...>
Date: Wed, 12 May 2021 12:37:55 -0700
Subject: Fix handling of control/meta escapes in literal regexps

Ruby uses a recursive algorithm for handling control/meta escapes
in strings (read_escape).  However, the equivalent code for regexps
(tokadd_escape) in did not use a recursive algorithm.  Due to this,
Handling of control/meta escapes in regexp did not have the same
behavior as in strings, leading to behavior such as the following
returning nil:

```ruby
/\c\xFF/ =~ "\c\xFF"
```

Switch the code for handling \c, \C and \M in literal regexps to
use the same code as for strings (read_escape), to keep behavior
consistent between the two.

Fixes [Bug #14367]
---
 parse.y                                         | 50 +++++++++----------------
 spec/ruby/language/regexp/interpolation_spec.rb |  2 +-
 test/ruby/test_regexp.rb                        | 18 +++++++++
 3 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/parse.y b/parse.y
index fc1fbd7..66813e5 100644
--- a/parse.y
+++ b/parse.y
@@ -6902,10 +6902,8 @@ static int https://github.com/ruby/ruby/blob/trunk/parse.y#L6902
 tokadd_escape(struct parser_params *p, rb_encoding **encp)
 {
     int c;
-    int flags = 0;
     size_t numlen;
 
-  first:
     switch (c = nextc(p)) {
       case '\n':
 	return 0;		/* just ignore */
@@ -6928,37 +6926,6 @@ tokadd_escape(struct parser_params *p, rb_encoding **encp) https://github.com/ruby/ruby/blob/trunk/parse.y#L6926
 	}
 	return 0;
 
-      case 'M':
-	if (flags & ESCAPE_META) goto eof;
-	if ((c = nextc(p)) != '-') {
-	    pushback(p, c);
-	    goto eof;
-	}
-	tokcopy(p, 3);
-	flags |= ESCAPE_META;
-	goto escaped;
-
-      case 'C':
-	if (flags & ESCAPE_CONTROL) goto eof;
-	if ((c = nextc(p)) != '-') {
-	    pushback(p, c);
-	    goto eof;
-	}
-	tokcopy(p, 3);
-	goto escaped;
-
-      case 'c':
-	if (flags & ESCAPE_CONTROL) goto eof;
-	tokcopy(p, 2);
-	flags |= ESCAPE_CONTROL;
-      escaped:
-	if ((c = nextc(p)) == '\\') {
-	    goto first;
-	}
-	else if (c == -1) goto eof;
-	tokadd(p, c);
-	return 0;
-
       eof:
       case -1:
         yyerror0("Invalid escape character syntax");
@@ -7151,6 +7118,23 @@ tokadd_string(struct parser_params *p, https://github.com/ruby/ruby/blob/trunk/parse.y#L7118
 		    goto non_ascii;
 		}
 		if (func & STR_FUNC_REGEXP) {
+                    switch (c) {
+                      case 'c':
+                      case 'C':
+                      case 'M': {
+                        pushback(p, c);
+                        c = read_escape(p, 0, enc);
+
+                        int i;
+                        char escbuf[5];
+                        snprintf(escbuf, sizeof(escbuf), "\\x%02X", c);
+                        for(i = 0; i < 4; i++) {
+                            tokadd(p, escbuf[i]);
+                        }
+                        continue;
+                      }
+                    }
+
 		    if (c == term && !simple_re_meta(c)) {
 			tokadd(p, c);
 			continue;
diff --git a/spec/ruby/language/regexp/interpolation_spec.rb b/spec/ruby/language/regexp/interpolation_spec.rb
index ed0b724..6951fd3 100644
--- a/spec/ruby/language/regexp/interpolation_spec.rb
+++ b/spec/ruby/language/regexp/interpolation_spec.rb
@@ -36,7 +36,7 @@ describe "Regexps with interpolation" do https://github.com/ruby/ruby/blob/trunk/spec/ruby/language/regexp/interpolation_spec.rb#L36
 
   it "gives precedence to escape sequences over substitution" do
     str = "J"
-    /\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})'
+    /\c#{str}/.to_s.should include('{str}')
   end
 
   it "throws RegexpError for malformed interpolation" do
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index a9e0cdf..5ba50b3 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -496,6 +496,24 @@ class TestRegexp < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L496
     assert_raise(RegexpError) { Regexp.new("((?<v>))\\g<0>") }
   end
 
+  def test_match_control_meta_escape
+    assert_equal(0, /\c\xFF/ =~ "\c\xFF")
+    assert_equal(0, /\c\M-\xFF/ =~ "\c\M-\xFF")
+    assert_equal(0, /\C-\xFF/ =~ "\C-\xFF")
+    assert_equal(0, /\C-\M-\xFF/ =~ "\C-\M-\xFF")
+    assert_equal(0, /\M-\xFF/ =~ "\M-\xFF")
+    assert_equal(0, /\M-\C-\xFF/ =~ "\M-\C-\xFF")
+    assert_equal(0, /\M-\c\xFF/ =~ "\M-\c\xFF")
+
+    assert_nil(/\c\xFE/ =~ "\c\xFF")
+    assert_nil(/\c\M-\xFE/ =~ "\c\M-\xFF")
+    assert_nil(/\C-\xFE/ =~ "\C-\xFF")
+    assert_nil(/\C-\M-\xFE/ =~ "\C-\M-\xFF")
+    assert_nil(/\M-\xFE/ =~ "\M-\xFF")
+    assert_nil(/\M-\C-\xFE/ =~ "\M-\C-\xFF")
+    assert_nil(/\M-\c\xFE/ =~ "\M-\c\xFF")
+  end
+
   def test_unescape
     assert_raise(ArgumentError) { s = '\\'; /#{ s }/ }
     assert_equal(/\xFF/n, /#{ s="\\xFF" }/n)
-- 
cgit v1.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/