ruby-changes:44891
From: nobu <ko1@a...>
Date: Fri, 2 Dec 2016 12:34:00 +0900 (JST)
Subject: [ruby-changes:44891] nobu:r56964 (trunk): parse.y: simplify parsing utf-8 string
nobu 2016-12-02 12:33:54 +0900 (Fri, 02 Dec 2016) New Revision: 56964 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=56964 Log: parse.y: simplify parsing utf-8 string * parse.y (parser_tokadd_codepoint): move error checks and add char. Modified files: trunk/parse.y Index: parse.y =================================================================== --- parse.y (revision 56963) +++ parse.y (revision 56964) @@ -5759,22 +5759,31 @@ parser_tok_hex(struct parser_params *par https://github.com/ruby/ruby/blob/trunk/parse.y#L5759 static int parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp, - int string_literal, int regexp_literal, - int codepoint, int numlen) + int regexp_literal, int wide) { + size_t numlen; + int codepoint = scan_hex(lex_p, wide ? 6 : 4, &numlen); + if (wide ? (numlen == 0) : (numlen < 4)) { + yyerror("invalid Unicode escape"); + return FALSE; + } + if (codepoint > 0x10ffff) { + yyerror("invalid Unicode codepoint (too large)"); + return FALSE; + } if ((codepoint & 0xfffff800) == 0xd800) { yyerror("invalid Unicode codepoint"); return FALSE; } lex_p += numlen; if (regexp_literal) { - tokcopy(numlen); + tokcopy((int)numlen); } else if (codepoint >= 0x80) { *encp = rb_utf8_encoding(); - if (string_literal) tokaddmbc(codepoint, *encp); + tokaddmbc(codepoint, *encp); } - else if (string_literal) { + else { tokadd(codepoint); } return TRUE; @@ -5783,7 +5792,7 @@ parser_tokadd_codepoint(struct parser_pa https://github.com/ruby/ruby/blob/trunk/parse.y#L5792 /* return value is for ?\u3042 */ static int parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp, - int string_literal, int symbol_literal, int regexp_literal) + int string_literal, int symbol_literal, int regexp_literal) { /* * If string_literal is true, then we allow multiple codepoints @@ -5792,8 +5801,6 @@ parser_tokadd_utf8(struct parser_params https://github.com/ruby/ruby/blob/trunk/parse.y#L5801 * codepoint without adding it */ - int codepoint; - size_t numlen; const int open_brace = '{', close_brace = '}'; if (regexp_literal) { tokadd('\\'); tokadd('u'); } @@ -5804,18 +5811,7 @@ parser_tokadd_utf8(struct parser_params https://github.com/ruby/ruby/blob/trunk/parse.y#L5811 pushback(c); do { if (regexp_literal) tokadd(last); - codepoint = scan_hex(lex_p, 6, &numlen); - if (numlen == 0) { - yyerror("invalid Unicode escape"); - return 0; - } - if (codepoint > 0x10ffff) { - yyerror("invalid Unicode codepoint (too large)"); - return 0; - } - if (!parser_tokadd_codepoint(parser, encp, - string_literal, regexp_literal, - codepoint, (int)numlen)) { + if (!parser_tokadd_codepoint(parser, encp, regexp_literal, TRUE)) { return 0; } while (ISSPACE(c = nextc())) last = c; @@ -5831,19 +5827,12 @@ parser_tokadd_utf8(struct parser_params https://github.com/ruby/ruby/blob/trunk/parse.y#L5827 nextc(); } else { /* handle \uxxxx form */ - codepoint = scan_hex(lex_p, 4, &numlen); - if (numlen < 4) { - yyerror("invalid Unicode escape"); - return 0; - } - if (!parser_tokadd_codepoint(parser, encp, - string_literal, regexp_literal, - codepoint, 4)) { + if (!parser_tokadd_codepoint(parser, encp, regexp_literal, FALSE)) { return 0; } } - return codepoint; + return TRUE; } #define ESCAPE_CONTROL 1 @@ -6189,7 +6178,7 @@ parser_tokadd_string(struct parser_param https://github.com/ruby/ruby/blob/trunk/parse.y#L6178 } parser_tokadd_utf8(parser, &enc, 1, func & STR_FUNC_SYMBOL, - func & STR_FUNC_REGEXP); + func & STR_FUNC_REGEXP); if (has_nonascii && enc != *encp) { mixed_escape(beg, enc, *encp); } @@ -7556,13 +7545,8 @@ parse_qmark(struct parser_params *parser https://github.com/ruby/ruby/blob/trunk/parse.y#L7545 else if (c == '\\') { if (peek('u')) { nextc(); - c = parser_tokadd_utf8(parser, &enc, 0, 0, 0); - if (0x80 <= c) { - tokaddmbc(c, enc); - } - else { - tokadd(c); - } + if (!parser_tokadd_utf8(parser, &enc, 0, 0, 0)) + return 0; } else if (!lex_eol_p() && !(c = *lex_p, ISASCII(c))) { nextc(); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/