ruby-changes:56373

https://git.ruby-lang.org/ruby.git/commit/?id=d746a41e85

From d746a41e85b746a90eef20c46d24880fe084ffc5 Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@r...>
Date: Fri, 5 Jul 2019 22:18:08 +0900
Subject: Multiple codepoints are not allowed at single character literal

It has unintentionally passed since 2.5.

diff --git a/parse.y b/parse.y
index 9ec262c..3708601 100644
--- a/parse.y
+++ b/parse.y
@@ -6246,24 +6246,28 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp, https://github.com/ruby/ruby/blob/trunk/parse.y#L6246
 /* return value is for ?\u3042 */
 static void
 tokadd_utf8(struct parser_params *p, rb_encoding **encp,
-	    int string_literal, int symbol_literal, int regexp_literal)
+	    int term, int symbol_literal, int regexp_literal)
 {
     /*
-     * If string_literal is true, then we allow multiple codepoints
-     * in \u{}, and add the codepoints to the current token.
-     * Otherwise we're parsing a character literal and return a single
-     * codepoint without adding it
+     * If `term` is not -1, then we allow multiple codepoints in \u{}
+     * upto `term` byte, otherwise we're parsing a character literal.
+     * And then add the codepoints to the current token.
      */
+    static const char multiple_codepoints[] = "Multiple codepoints at single character literal";
 
     const int open_brace = '{', close_brace = '}';
 
     if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); }
 
     if (peek(p, open_brace)) {  /* handle \u{...} form */
+	const char *second = NULL;
 	int c, last = nextc(p);
 	if (p->lex.pcur >= p->lex.pend) goto unterminated;
 	while (ISSPACE(c = *p->lex.pcur) && ++p->lex.pcur < p->lex.pend);
 	while (c != close_brace) {
+	    if (c == term) goto unterminated;
+	    if (second == multiple_codepoints)
+		second = p->lex.pcur;
 	    if (regexp_literal) tokadd(p, last);
 	    if (!tokadd_codepoint(p, encp, regexp_literal, TRUE)) {
 		break;
@@ -6272,6 +6276,8 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp, https://github.com/ruby/ruby/blob/trunk/parse.y#L6276
 		if (++p->lex.pcur >= p->lex.pend) goto unterminated;
 		last = c;
 	    }
+	    if (term == -1 && !second)
+		second = multiple_codepoints;
 	}
 
 	if (c != close_brace) {
@@ -6280,6 +6286,15 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp, https://github.com/ruby/ruby/blob/trunk/parse.y#L6286
 	    yyerror0("unterminated Unicode escape");
 	    return;
 	}
+	if (second && second != multiple_codepoints) {
+	    const char *pcur = p->lex.pcur;
+	    p->lex.pcur = second;
+	    dispatch_scan_event(p, tSTRING_CONTENT);
+	    token_flush(p);
+	    p->lex.pcur = pcur;
+	    yyerror0(multiple_codepoints);
+	    token_flush(p);
+	}
 
 	if (regexp_literal) tokadd(p, close_brace);
 	nextc(p);
diff --git a/test/ruby/test_parse.rb b/test/ruby/test_parse.rb
index 4a0c296..75f0896 100644
--- a/test/ruby/test_parse.rb
+++ b/test/ruby/test_parse.rb
@@ -577,6 +577,7 @@ class TestParse < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_parse.rb#L577
     assert_equal("\u{1234}", eval("?\u{1234}"))
     assert_equal("\u{1234}", eval('?\u{1234}'))
     assert_equal("\u{1234}", eval('?\u1234'))
+    assert_syntax_error('?\u{41 42}', 'Multiple codepoints at single character literal')
     e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
     assert_not_match(/end-of-input/, e.message)
 
-- 
cgit v0.10.2


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/