ruby-changes:47302
From: nobu <ko1@a...>
Date: Tue, 25 Jul 2017 17:30:20 +0900 (JST)
Subject: [ruby-changes:47302] nobu:r59417 (trunk): parse.y: limit codepoint length
nobu 2017-07-25 17:30:11 +0900 (Tue, 25 Jul 2017) New Revision: 59417 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=59417 Log: parse.y: limit codepoint length * parse.y (parser_tokadd_codepoint): limit Unicode codepoint length. too long codepoint has been split unexpectedly since r57050. Modified files: trunk/parse.y trunk/test/ruby/test_parse.rb Index: parse.y =================================================================== --- parse.y (revision 59416) +++ parse.y (revision 59417) @@ -5661,20 +5661,20 @@ parser_tokadd_codepoint(struct parser_pa https://github.com/ruby/ruby/blob/trunk/parse.y#L5661 int regexp_literal, int wide) { size_t numlen; - int codepoint = scan_hex(lex_p, wide ? 6 : 4, &numlen); + int codepoint = scan_hex(lex_p, wide ? lex_pend - lex_p : 4, &numlen); literal_flush(lex_p); lex_p += numlen; - if (wide ? (numlen == 0) : (numlen < 4)) { + if (wide ? (numlen == 0 || numlen > 6) : (numlen < 4)) { yyerror("invalid Unicode escape"); - return FALSE; + return wide && numlen > 0; } if (codepoint > 0x10ffff) { yyerror("invalid Unicode codepoint (too large)"); - return FALSE; + return wide; } if ((codepoint & 0xfffff800) == 0xd800) { yyerror("invalid Unicode codepoint"); - return FALSE; + return wide; } if (regexp_literal) { tokcopy((int)numlen); @@ -5687,7 +5687,7 @@ parser_tokadd_codepoint(struct parser_pa https://github.com/ruby/ruby/blob/trunk/parse.y#L5687 char *mesg = alloca(len); snprintf(mesg, len, mixed_utf8, rb_enc_name(*encp)); yyerror(mesg); - return TRUE; + return wide; } *encp = utf8; tokaddmbc(codepoint, *encp); @@ -5718,7 +5718,7 @@ parser_tokadd_utf8(struct parser_params https://github.com/ruby/ruby/blob/trunk/parse.y#L5718 int c, last = nextc(); if (lex_p >= lex_pend) goto unterminated; while (ISSPACE(c = *lex_p) && ++lex_p < lex_pend); - while (!string_literal || c != close_brace) { + do { if (regexp_literal) tokadd(last); if (!parser_tokadd_codepoint(parser, encp, regexp_literal, TRUE)) { break; @@ -5727,8 +5727,7 @@ parser_tokadd_utf8(struct parser_params https://github.com/ruby/ruby/blob/trunk/parse.y#L5727 if (++lex_p >= lex_pend) goto unterminated; last = c; } - if (!string_literal) break; - } + } while (c != close_brace); if (c != close_brace) { unterminated: Index: test/ruby/test_parse.rb =================================================================== --- test/ruby/test_parse.rb (revision 59416) +++ test/ruby/test_parse.rb (revision 59417) @@ -516,6 +516,8 @@ class TestParse < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_parse.rb#L516 src = '"\xD0\u{90'"\n""000000000000000000000000" assert_syntax_error(src, /:#{__LINE__}: unterminated/o) + assert_syntax_error('"\u{100000000}"', /invalid Unicode escape/) + assert_equal("\x81", eval('"\C-\M-a"')) assert_equal("\177", eval('"\c?"')) end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/