ruby-changes:55816
From: Nobuyoshi <ko1@a...>
Date: Fri, 24 May 2019 16:15:48 +0900 (JST)
Subject: [ruby-changes:55816] Nobuyoshi Nakada: 2893550452 (trunk): Mixed encoding error can continue to parse
https://git.ruby-lang.org/ruby.git/commit/?id=2893550452 From 2893550452f6f3cadb17c670da185813d7d0a835 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada <nobu@r...> Date: Fri, 24 May 2019 16:10:59 +0900 Subject: Mixed encoding error can continue to parse diff --git a/parse.y b/parse.y index 92e4d05..2b34e8a 100644 --- a/parse.y +++ b/parse.y @@ -6180,7 +6180,7 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp, https://github.com/ruby/ruby/blob/trunk/parse.y#L6180 } /* return value is for ?\u3042 */ -static int +static void parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp, int string_literal, int symbol_literal, int regexp_literal) { @@ -6214,7 +6214,7 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp, https://github.com/ruby/ruby/blob/trunk/parse.y#L6214 unterminated: literal_flush(p, p->lex.pcur); yyerror0("unterminated Unicode escape"); - return 0; + return; } if (regexp_literal) tokadd(p, close_brace); @@ -6222,11 +6222,11 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp, https://github.com/ruby/ruby/blob/trunk/parse.y#L6222 } else { /* handle \uxxxx form */ if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) { - return 0; + return; } } - return TRUE; + return; } #define ESCAPE_CONTROL 1 @@ -6568,11 +6568,9 @@ tokadd_string(struct parser_params *p, https://github.com/ruby/ruby/blob/trunk/parse.y#L6568 tokadd(p, '\\'); break; } - if (!parser_tokadd_utf8(p, enc, term, - func & STR_FUNC_SYMBOL, - func & STR_FUNC_REGEXP)) { - continue; - } + parser_tokadd_utf8(p, enc, term, + func & STR_FUNC_SYMBOL, + func & STR_FUNC_REGEXP); continue; default: @@ -8070,8 +8068,7 @@ parse_qmark(struct parser_params *p, int space_seen) https://github.com/ruby/ruby/blob/trunk/parse.y#L8068 if (peek(p, 'u')) { nextc(p); enc = rb_utf8_encoding(); - if (!parser_tokadd_utf8(p, &enc, -1, 0, 0)) - return 0; + parser_tokadd_utf8(p, &enc, -1, 0, 0); } else if (!lex_eol_p(p) && !(c = *p->lex.pcur, ISASCII(c))) { nextc(p); diff --git a/test/ruby/test_parse.rb b/test/ruby/test_parse.rb index dc4c143..c59454f 100644 --- a/test/ruby/test_parse.rb +++ b/test/ruby/test_parse.rb @@ -562,6 +562,9 @@ class TestParse < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_parse.rb#L562 assert_raise(SyntaxError) { eval(" ?a\x8a".force_encoding("utf-8")) } assert_equal("\u{1234}", eval("?\u{1234}")) assert_equal("\u{1234}", eval('?\u{1234}')) + assert_equal("\u{1234}", eval('?\u1234')) + e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape') + assert_not_match(/end-of-input/, e.message) end def test_percent diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index 7bf1e0e..e640262 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -775,32 +775,39 @@ eom https://github.com/ruby/ruby/blob/trunk/test/ruby/test_syntax.rb#L775 end def test_heredoc_mixed_encoding - assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') #encoding: cp932 <<-TEXT \xe9\x9d\u1234 TEXT HEREDOC - assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + assert_not_match(/end-of-input/, e.message) + + e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') #encoding: cp932 <<-TEXT \xe9\x9d \u1234 TEXT HEREDOC - assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + assert_not_match(/end-of-input/, e.message) + + e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') #encoding: cp932 <<-TEXT \u1234\xe9\x9d TEXT HEREDOC - assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + assert_not_match(/end-of-input/, e.message) + + e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') #encoding: cp932 <<-TEXT \u1234 \xe9\x9d TEXT HEREDOC + assert_not_match(/end-of-input/, e.message) end def test_lineno_operation_brace_block -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/