ruby-changes:47302

nobu	2017-07-25 17:30:11 +0900 (Tue, 25 Jul 2017)

  New Revision: 59417

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59417

  Log:
    parse.y: limit codepoint length
    
    * parse.y (parser_tokadd_codepoint): limit Unicode codepoint
      length.  too long codepoint has been split unexpectedly since
      r57050.

  Modified files:
    trunk/parse.y
    trunk/test/ruby/test_parse.rb
Index: parse.y
===================================================================
--- parse.y	(revision 59416)
+++ parse.y	(revision 59417)
@@ -5661,20 +5661,20 @@ parser_tokadd_codepoint(struct parser_pa https://github.com/ruby/ruby/blob/trunk/parse.y#L5661
 			int regexp_literal, int wide)
 {
     size_t numlen;
-    int codepoint = scan_hex(lex_p, wide ? 6 : 4, &numlen);
+    int codepoint = scan_hex(lex_p, wide ? lex_pend - lex_p : 4, &numlen);
     literal_flush(lex_p);
     lex_p += numlen;
-    if (wide ? (numlen == 0) : (numlen < 4))  {
+    if (wide ? (numlen == 0 || numlen > 6) : (numlen < 4))  {
 	yyerror("invalid Unicode escape");
-	return FALSE;
+	return wide && numlen > 0;
     }
     if (codepoint > 0x10ffff) {
 	yyerror("invalid Unicode codepoint (too large)");
-	return FALSE;
+	return wide;
     }
     if ((codepoint & 0xfffff800) == 0xd800) {
 	yyerror("invalid Unicode codepoint");
-	return FALSE;
+	return wide;
     }
     if (regexp_literal) {
 	tokcopy((int)numlen);
@@ -5687,7 +5687,7 @@ parser_tokadd_codepoint(struct parser_pa https://github.com/ruby/ruby/blob/trunk/parse.y#L5687
 	    char *mesg = alloca(len);
 	    snprintf(mesg, len, mixed_utf8, rb_enc_name(*encp));
 	    yyerror(mesg);
-	    return TRUE;
+	    return wide;
 	}
 	*encp = utf8;
 	tokaddmbc(codepoint, *encp);
@@ -5718,7 +5718,7 @@ parser_tokadd_utf8(struct parser_params https://github.com/ruby/ruby/blob/trunk/parse.y#L5718
 	int c, last = nextc();
 	if (lex_p >= lex_pend) goto unterminated;
 	while (ISSPACE(c = *lex_p) && ++lex_p < lex_pend);
-	while (!string_literal || c != close_brace) {
+	do {
 	    if (regexp_literal) tokadd(last);
 	    if (!parser_tokadd_codepoint(parser, encp, regexp_literal, TRUE)) {
 		break;
@@ -5727,8 +5727,7 @@ parser_tokadd_utf8(struct parser_params https://github.com/ruby/ruby/blob/trunk/parse.y#L5727
 		if (++lex_p >= lex_pend) goto unterminated;
 		last = c;
 	    }
-	    if (!string_literal) break;
-	}
+	} while (c != close_brace);
 
 	if (c != close_brace) {
 	  unterminated:
Index: test/ruby/test_parse.rb
===================================================================
--- test/ruby/test_parse.rb	(revision 59416)
+++ test/ruby/test_parse.rb	(revision 59417)
@@ -516,6 +516,8 @@ class TestParse < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_parse.rb#L516
     src = '"\xD0\u{90'"\n""000000000000000000000000"
     assert_syntax_error(src, /:#{__LINE__}: unterminated/o)
 
+    assert_syntax_error('"\u{100000000}"', /invalid Unicode escape/)
+
     assert_equal("\x81", eval('"\C-\M-a"'))
     assert_equal("\177", eval('"\c?"'))
   end

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/