ruby-changes:21112
From: nobu <ko1@a...>
Date: Fri, 2 Sep 2011 09:58:25 +0900 (JST)
Subject: [ruby-changes:21112] nobu:r33161 (trunk, ruby_1_9_3): * parse.y (parser_tokadd_string, parser_yylex): ignore a backslash
nobu 2011-09-02 09:58:10 +0900 (Fri, 02 Sep 2011) New Revision: 33161 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=33161 Log: * parse.y (parser_tokadd_string, parser_yylex): ignore a backslash which prefixes an non-ascii character, which has no escape syntax. [ruby-core:39222] [Ruby 1.9 - Bug #5262] Modified files: branches/ruby_1_9_3/ChangeLog branches/ruby_1_9_3/parse.y branches/ruby_1_9_3/test/ruby/test_literal.rb trunk/ChangeLog trunk/parse.y trunk/test/ruby/test_literal.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 33160) +++ ChangeLog (revision 33161) @@ -1,3 +1,9 @@ +Fri Sep 2 09:58:08 2011 Nobuyoshi Nakada <nobu@r...> + + * parse.y (parser_tokadd_string, parser_yylex): ignore a backslash + which prefixes an non-ascii character, which has no escape + syntax. [ruby-core:39222] [Ruby 1.9 - Bug #5262] + Fri Sep 2 04:05:25 2011 Aaron Patterson <aaron@t...> * ext/psych/lib/psych/visitors/yaml_tree.rb: emit strings tagged as Index: parse.y =================================================================== --- parse.y (revision 33160) +++ parse.y (revision 33161) @@ -5411,6 +5411,7 @@ } #define lex_goto_eol(parser) ((parser)->parser_lex_p = (parser)->parser_lex_pend) +#define lex_eol_p() (lex_p >= lex_pend) #define peek(c) peek_n((c), 0) #define peek_n(c,n) (lex_p+(n) < lex_pend && (c) == (unsigned char)lex_p[n]) @@ -5920,6 +5921,8 @@ continue; default: + if (c == -1) return -1; + if (!ISASCII(c)) goto non_ascii; if (func & STR_FUNC_REGEXP) { pushback(c); if ((c = tokadd_escape(&enc)) < 0) @@ -5945,6 +5948,7 @@ } } else if (!parser_isascii()) { + non_ascii: has_nonascii = 1; if (enc != *encp) { mixed_error(enc, *encp); @@ -7003,6 +7007,10 @@ tokadd(c); } } + else if (!lex_eol_p() && !(c = *lex_p, ISASCII(c))) { + nextc(); + if (tokadd_mbchar(c) == -1) return 0; + } else { c = read_escape(0, &enc); tokadd(c); Index: test/ruby/test_literal.rb =================================================================== --- test/ruby/test_literal.rb (revision 33160) +++ test/ruby/test_literal.rb (revision 33161) @@ -53,15 +53,33 @@ assert_equal "3", "\x33" assert_equal "\n", "\n" bug2500 = '[ruby-core:27228]' + bug5262 = '[ruby-core:39222]' %w[c C- M-].each do |pre| ["u", %w[u{ }]].each do |open, close| - str = "\"\\#{pre}\\#{open}5555#{close}\"" - assert_raise(SyntaxError, "#{bug2500} eval(#{str})") {eval(str)} + ["?", ['"', '"']].each do |qopen, qclose| + str = "#{qopen}\\#{pre}\\#{open}5555#{close}#{qclose}" + assert_raise(SyntaxError, "#{bug2500} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\u201c#{close}#{qclose}" + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\u201c#{close}#{qclose}".encode("euc-jp") + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\u201c#{close}#{qclose}".encode("iso-8859-13") + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\xe2\x7f#{close}#{qclose}".force_encoding("utf-8") + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + end end end assert_equal "\x13", "\c\x33" assert_equal "\x13", "\C-\x33" assert_equal "\xB3", "\M-\x33" + assert_equal "\u201c", eval(%["\\\u{201c}"]), bug5262 + assert_equal "\u201c".encode("euc-jp"), eval(%["\\\u{201c}"].encode("euc-jp")), bug5262 + assert_equal "\u201c".encode("iso-8859-13"), eval(%["\\\u{201c}"].encode("iso-8859-13")), bug5262 end def test_dstring Index: ruby_1_9_3/ChangeLog =================================================================== --- ruby_1_9_3/ChangeLog (revision 33160) +++ ruby_1_9_3/ChangeLog (revision 33161) @@ -1,3 +1,9 @@ +Fri Sep 2 09:58:08 2011 Nobuyoshi Nakada <nobu@r...> + + * parse.y (parser_tokadd_string, parser_yylex): ignore a backslash + which prefixes an non-ascii character, which has no escape + syntax. [ruby-core:39222] [Ruby 1.9 - Bug #5262] + Thu Sep 1 17:31:22 2011 Nobuyoshi Nakada <nobu@r...> * insns.def (defineclass), vm_insnhelper.c (vm_get_cvar_base): see Index: ruby_1_9_3/parse.y =================================================================== --- ruby_1_9_3/parse.y (revision 33160) +++ ruby_1_9_3/parse.y (revision 33161) @@ -5411,6 +5411,7 @@ } #define lex_goto_eol(parser) ((parser)->parser_lex_p = (parser)->parser_lex_pend) +#define lex_eol_p() (lex_p >= lex_pend) #define peek(c) peek_n((c), 0) #define peek_n(c,n) (lex_p+(n) < lex_pend && (c) == (unsigned char)lex_p[n]) @@ -5920,6 +5921,8 @@ continue; default: + if (c == -1) return -1; + if (!ISASCII(c)) goto non_ascii; if (func & STR_FUNC_REGEXP) { pushback(c); if ((c = tokadd_escape(&enc)) < 0) @@ -5945,6 +5948,7 @@ } } else if (!parser_isascii()) { + non_ascii: has_nonascii = 1; if (enc != *encp) { mixed_error(enc, *encp); @@ -7003,6 +7007,10 @@ tokadd(c); } } + else if (!lex_eol_p() && !(c = *lex_p, ISASCII(c))) { + nextc(); + if (tokadd_mbchar(c) == -1) return 0; + } else { c = read_escape(0, &enc); tokadd(c); Index: ruby_1_9_3/test/ruby/test_literal.rb =================================================================== --- ruby_1_9_3/test/ruby/test_literal.rb (revision 33160) +++ ruby_1_9_3/test/ruby/test_literal.rb (revision 33161) @@ -53,15 +53,33 @@ assert_equal "3", "\x33" assert_equal "\n", "\n" bug2500 = '[ruby-core:27228]' + bug5262 = '[ruby-core:39222]' %w[c C- M-].each do |pre| ["u", %w[u{ }]].each do |open, close| - str = "\"\\#{pre}\\#{open}5555#{close}\"" - assert_raise(SyntaxError, "#{bug2500} eval(#{str})") {eval(str)} + ["?", ['"', '"']].each do |qopen, qclose| + str = "#{qopen}\\#{pre}\\#{open}5555#{close}#{qclose}" + assert_raise(SyntaxError, "#{bug2500} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\u201c#{close}#{qclose}" + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\u201c#{close}#{qclose}".encode("euc-jp") + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\u201c#{close}#{qclose}".encode("iso-8859-13") + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + + str = "#{qopen}\\#{pre}\\#{open}\xe2\x7f#{close}#{qclose}".force_encoding("utf-8") + assert_raise(SyntaxError, "#{bug5262} eval(#{str})") {eval(str)} + end end end assert_equal "\x13", "\c\x33" assert_equal "\x13", "\C-\x33" assert_equal "\xB3", "\M-\x33" + assert_equal "\u201c", eval(%["\\\u{201c}"]), bug5262 + assert_equal "\u201c".encode("euc-jp"), eval(%["\\\u{201c}"].encode("euc-jp")), bug5262 + assert_equal "\u201c".encode("iso-8859-13"), eval(%["\\\u{201c}"].encode("iso-8859-13")), bug5262 end def test_dstring -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/