ruby-changes:57278
From: usa <ko1@a...>
Date: Tue, 27 Aug 2019 00:36:55 +0900 (JST)
Subject: [ruby-changes:57278] usa: a9a3769530 (ruby_2_5): merge revision(s) 6375c68f8851e1e0fee8a95afba91c4555097127,c05eaa93258ddc01e685b6cc3a0da82998a2af48: [Backport #15839]
https://git.ruby-lang.org/ruby.git/commit/?id=a9a3769530 From a9a3769530072559d429fc3b7132fef4e32f27b2 Mon Sep 17 00:00:00 2001 From: usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> Date: Mon, 26 Aug 2019 15:36:38 +0000 Subject: merge revision(s) 6375c68f8851e1e0fee8a95afba91c4555097127,c05eaa93258ddc01e685b6cc3a0da82998a2af48: [Backport #15839] parse.y: function parser_mixed_error & parser_mixed_escape Fix mixed encoding in heredoc Heredocs are parsed line-by-line, so we need to keep track of the temporary encoding of the string. Previously, a heredoc would only detect mixed encoding errors if they were on the same line, this changes things so they will be caught on different lines. Fixes [Bug #15839] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_5@67763 b2dd03c8-39d4-4d8f-98ff-823fe69b080e diff --git a/parse.y b/parse.y index 62428c9..8d83132 100644 --- a/parse.y +++ b/parse.y @@ -5141,7 +5141,7 @@ none : /* none */ https://github.com/ruby/ruby/blob/trunk/parse.y#L5141 # define yylval (*parser->lval) static int parser_regx_options(struct parser_params*); -static int parser_tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**); +static int parser_tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**,rb_encoding**); static void parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc); static enum yytokentype parser_parse_string(struct parser_params*,rb_strterm_literal_t*); static enum yytokentype parser_here_document(struct parser_params*,rb_strterm_heredoc_t*); @@ -5156,7 +5156,7 @@ static enum yytokentype parser_here_document(struct parser_params*,rb_strterm_he https://github.com/ruby/ruby/blob/trunk/parse.y#L5156 # define read_escape(flags,e) parser_read_escape(parser, (flags), (e)) # define tokadd_escape(e) parser_tokadd_escape(parser, (e)) # define regx_options() parser_regx_options(parser) -# define tokadd_string(f,t,p,n,e) parser_tokadd_string(parser,(f),(t),(p),(n),(e)) +# define tokadd_string(f,t,p,n,e,e2) parser_tokadd_string(parser,(f),(t),(p),(n),(e),(e2)) # define parse_string(n) parser_parse_string(parser,(n)) # define tokaddmbc(c, enc) parser_tokaddmbc(parser, (c), (enc)) # define here_document(n) parser_here_document(parser,(n)) @@ -6339,32 +6339,38 @@ parser_update_heredoc_indent(struct parser_params *parser, int c) https://github.com/ruby/ruby/blob/trunk/parse.y#L6339 return FALSE; } +static void +parser_mixed_error(struct parser_params *parser, rb_encoding *enc1, rb_encoding *enc2) +{ + static const char mixed_msg[] = "%s mixed within %s source"; + const char *n1 = rb_enc_name(enc1), *n2 = rb_enc_name(enc2); + const size_t len = sizeof(mixed_msg) - 4 + strlen(n1) + strlen(n2); + char *errbuf = ALLOCA_N(char, len); + snprintf(errbuf, len, mixed_msg, n1, n2); + yyerror0(errbuf); +} + +static void +parser_mixed_escape(struct parser_params *p, const char *beg, rb_encoding *enc1, rb_encoding *enc2) +{ + const char *pos = p->lex.pcur; + p->lex.pcur = beg; + parser_mixed_error(p, enc1, enc2); + p->lex.pcur = pos; +} + static int parser_tokadd_string(struct parser_params *parser, int func, int term, int paren, long *nest, - rb_encoding **encp) + rb_encoding **encp, rb_encoding **enc) { int c; - rb_encoding *enc = 0; - char *errbuf = 0; - static const char mixed_msg[] = "%s mixed within %s source"; + bool erred = false; -#define mixed_error(enc1, enc2) if (!errbuf) { \ - size_t len = sizeof(mixed_msg) - 4; \ - len += strlen(rb_enc_name(enc1)); \ - len += strlen(rb_enc_name(enc2)); \ - errbuf = ALLOCA_N(char, len); \ - snprintf(errbuf, len, mixed_msg, \ - rb_enc_name(enc1), \ - rb_enc_name(enc2)); \ - yyerror0(errbuf); \ - } -#define mixed_escape(beg, enc1, enc2) do { \ - const char *pos = lex_p; \ - lex_p = (beg); \ - mixed_error((enc1), (enc2)); \ - lex_p = pos; \ - } while (0) +#define mixed_error(enc1, enc2) \ + (void)(erred || (parser_mixed_error(parser, enc1, enc2), erred = true)) +#define mixed_escape(beg, enc1, enc2) \ + (void)(erred || (parser_mixed_escape(parser, beg, enc1, enc2), erred = true)) while ((c = nextc()) != -1) { if (heredoc_indent > 0) { @@ -6414,7 +6420,7 @@ parser_tokadd_string(struct parser_params *parser, https://github.com/ruby/ruby/blob/trunk/parse.y#L6420 tokadd('\\'); break; } - if (!parser_tokadd_utf8(parser, &enc, term, + if (!parser_tokadd_utf8(parser, enc, term, func & STR_FUNC_SYMBOL, func & STR_FUNC_REGEXP)) { return -1; @@ -6433,17 +6439,17 @@ parser_tokadd_string(struct parser_params *parser, https://github.com/ruby/ruby/blob/trunk/parse.y#L6439 continue; } pushback(c); - if ((c = tokadd_escape(&enc)) < 0) + if ((c = tokadd_escape(enc)) < 0) return -1; - if (enc && enc != *encp) { - mixed_escape(parser->tokp+2, enc, *encp); + if (*enc && *enc != *encp) { + mixed_escape(parser->tokp+2, *enc, *encp); } continue; } else if (func & STR_FUNC_EXPAND) { pushback(c); if (func & STR_FUNC_ESCAPE) tokadd('\\'); - c = read_escape(0, &enc); + c = read_escape(0, enc); } else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { /* ignore backslashed spaces in %w */ @@ -6457,11 +6463,11 @@ parser_tokadd_string(struct parser_params *parser, https://github.com/ruby/ruby/blob/trunk/parse.y#L6463 } else if (!parser_isascii()) { non_ascii: - if (!enc) { - enc = *encp; + if (!*enc) { + *enc = *encp; } - else if (enc != *encp) { - mixed_error(enc, *encp); + else if (*enc != *encp) { + mixed_error(*enc, *encp); continue; } if (tokadd_mbchar(c) == -1) return -1; @@ -6472,18 +6478,18 @@ parser_tokadd_string(struct parser_params *parser, https://github.com/ruby/ruby/blob/trunk/parse.y#L6478 break; } if (c & 0x80) { - if (!enc) { - enc = *encp; + if (!*enc) { + *enc = *encp; } - else if (enc != *encp) { - mixed_error(enc, *encp); + else if (*enc != *encp) { + mixed_error(*enc, *encp); continue; } } tokadd(c); } terminate: - if (enc) *encp = enc; + if (*enc) *encp = *enc; return c; } @@ -6612,6 +6618,7 @@ parser_parse_string(struct parser_params *parser, rb_strterm_literal_t *quote) https://github.com/ruby/ruby/blob/trunk/parse.y#L6618 int paren = (int)quote->u2.paren; int c, space = 0; rb_encoding *enc = current_enc; + rb_encoding *base_enc = 0; VALUE lit; if (func & STR_FUNC_TERM) { @@ -6652,7 +6659,7 @@ parser_parse_string(struct parser_params *parser, rb_strterm_literal_t *quote) https://github.com/ruby/ruby/blob/trunk/parse.y#L6659 } pushback(c); if (tokadd_string(func, term, paren, "e->u0.nest, - &enc) == -1) { + &enc, &base_enc) == -1) { if (parser->eofp) { #ifndef RIPPER # define unterminated_literal(mesg) yyerror0(mesg) @@ -6987,6 +6994,7 @@ parser_here_document(struct parser_params *parser, rb_strterm_heredoc_t *here) https://github.com/ruby/ruby/blob/trunk/parse.y#L6994 long len; VALUE str = 0; rb_encoding *enc = current_enc; + rb_encoding *base_enc = 0; int bol; eos = RSTRING_PTR(here->term); @@ -7099,7 +7107,8 @@ parser_here_document(struct parser_params *parser, rb_strterm_heredoc_t *here) https://github.com/ruby/ruby/blob/trunk/parse.y#L7107 } do { pushback(c); - if ((c = tokadd_string(func, '\n', 0, NULL, &enc)) == -1) { + enc = current_enc; + if ((c = tokadd_string(func, '\n', 0, NULL, &enc, &base_enc)) == -1) { if (parser->eofp) goto error; goto restore; } diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index b81ec90..f8d28a4 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -711,6 +711,35 @@ eom https://github.com/ruby/ruby/blob/trunk/test/ruby/test_syntax.rb#L711 assert_syntax_error('<<~ "#{}"', /unexpected <</) end + def test_heredoc_mixed_encoding + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \xe9\x9d\u1234 + TEXT + HEREDOC + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \xe9\x9d + \u1234 + TEXT + HEREDOC + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \u1234\xe9\x9d + TEXT + HEREDOC + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \u1234 + \xe9\x9d + TEXT + HEREDOC + end + def test_lineno_operation_brace_block expected = __LINE__ + 1 actual = caller_lineno\ diff --git a/version.h b/version.h index 522a552..3b5eea5 100644 --- a/version.h +++ b/version.h @@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/version.h#L1 #define RUBY_VERSION "2.5.6" #define RUBY_RELEASE_DATE "2019-08-27" -#define RUBY_PATCHLEVEL 181 +#define RUBY_PATCHLEVEL 182 #define RUBY_RELEASE_YEAR 2019 #define RUBY_RELEASE_MONTH 8 -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/