ruby-changes:44883
From: nobu <ko1@a...>
Date: Thu, 1 Dec 2016 17:26:44 +0900 (JST)
Subject: [ruby-changes:44883] nobu:r56956 (trunk): parse.y: reject invalid codepoint
nobu 2016-12-01 17:26:39 +0900 (Thu, 01 Dec 2016) New Revision: 56956 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=56956 Log: parse.y: reject invalid codepoint * parse.y (parser_tokadd_codepoint): reject invalid codepoint, surrogate blocks and surrogate pair, as well as mruby. Modified files: trunk/parse.y trunk/test/ruby/test_unicode_escape.rb Index: test/ruby/test_unicode_escape.rb =================================================================== --- test/ruby/test_unicode_escape.rb (revision 56955) +++ test/ruby/test_unicode_escape.rb (revision 56956) @@ -264,12 +264,9 @@ EOS https://github.com/ruby/ruby/blob/trunk/test/ruby/test_unicode_escape.rb#L264 assert_raise(SyntaxError) { eval %q("\u{ 123 456}")} # extra space assert_raise(SyntaxError) { eval %q("\u{123 456}")} # extra space -# The utf-8 encoding object currently does not object to codepoints -# in the surrogate blocks, so these do not raise an error. -# assert_raise(SyntaxError) { "\uD800" } # surrogate block -# assert_raise(SyntaxError) { "\uDCBA" } # surrogate block -# assert_raise(SyntaxError) { "\uDFFF" } # surrogate block -# assert_raise(SyntaxError) { "\uD847\uDD9A" } # surrogate pair - + assert_raise(SyntaxError) { eval %q("\uD800") } # surrogate block + assert_raise(SyntaxError) { eval %q("\uDCBA") } # surrogate block + assert_raise(SyntaxError) { eval %q("\uDFFF") } # surrogate block + assert_raise(SyntaxError) { eval %q("\uD847\uDD9A") } # surrogate pair end end Index: parse.y =================================================================== --- parse.y (revision 56955) +++ parse.y (revision 56956) @@ -5757,11 +5757,15 @@ parser_tok_hex(struct parser_params *par https://github.com/ruby/ruby/blob/trunk/parse.y#L5757 #define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n)) -static void +static int parser_tokadd_codepoint(struct parser_params *parser, rb_encoding **encp, int string_literal, int regexp_literal, int codepoint, int numlen) { + if ((codepoint & 0xfffff800) == 0xd800) { + yyerror("invalid Unicode codepoint"); + return FALSE; + } lex_p += numlen; if (regexp_literal) { tokcopy(numlen); @@ -5773,6 +5777,7 @@ parser_tokadd_codepoint(struct parser_pa https://github.com/ruby/ruby/blob/trunk/parse.y#L5777 else if (string_literal) { tokadd(codepoint); } + return TRUE; } /* return value is for ?\u3042 */ @@ -5806,8 +5811,11 @@ parser_tokadd_utf8(struct parser_params https://github.com/ruby/ruby/blob/trunk/parse.y#L5811 yyerror("invalid Unicode codepoint (too large)"); return 0; } - parser_tokadd_codepoint(parser, encp,string_literal, regexp_literal, - codepoint, (int)numlen); + if (!parser_tokadd_codepoint(parser, encp, + string_literal, regexp_literal, + codepoint, (int)numlen)) { + return 0; + } if (ISSPACE(c = nextc())) last = c; } while (string_literal && c != close_brace); @@ -5824,8 +5832,11 @@ parser_tokadd_utf8(struct parser_params https://github.com/ruby/ruby/blob/trunk/parse.y#L5832 yyerror("invalid Unicode escape"); return 0; } - parser_tokadd_codepoint(parser, encp, string_literal, regexp_literal, - codepoint, 4); + if (!parser_tokadd_codepoint(parser, encp, + string_literal, regexp_literal, + codepoint, 4)) { + return 0; + } } return codepoint; -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/