ruby-changes:49342
From: naruse <ko1@a...>
Date: Mon, 25 Dec 2017 10:35:07 +0900 (JST)
Subject: [ruby-changes:49342] naruse:r61379 (trunk): fix escapes in undump
naruse 2017-12-21 14:08:57 +0900 (Thu, 21 Dec 2017) New Revision: 61379 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=61379 Log: fix escapes in undump Modified files: trunk/string.c trunk/test/ruby/test_string.rb Index: test/ruby/test_string.rb =================================================================== --- test/ruby/test_string.rb (revision 61378) +++ test/ruby/test_string.rb (revision 61379) @@ -756,13 +756,18 @@ CODE https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L756 def test_undump a = S("Test") << 1 << 2 << 3 << 9 << 13 << 10 assert_equal(a, S('"Test\\x01\\x02\\x03\\t\\r\\n"').undump) + assert_equal(S("\\ca"), S('"\\ca"').undump) assert_equal(S("\u{7F}"), S('"\\x7F"').undump) + assert_equal(S("\u{7F}A"), S('"\\x7FA"').undump) assert_equal(S("\u{AB}"), S('"\\u00AB"').undump) assert_equal(S("\u{ABC}"), S('"\\u0ABC"').undump) assert_equal(S("\uABCD"), S('"\\uABCD"').undump) + assert_equal(S("\uABCD"), S('"\\uABCD"').undump) assert_equal(S("\u{ABCDE}"), S('"\\u{ABCDE}"').undump) assert_equal(S("\u{10ABCD}"), S('"\\u{10ABCD}"').undump) assert_equal(S("\u{ABCDE 10ABCD}"), S('"\\u{ABCDE 10ABCD}"').undump) + assert_equal(S(""), S('"\\u{}"').undump) + assert_equal(S(""), S('"\\u{ }"').undump) assert_equal(S("辰旦端"), S('"\u00E4\u00F6\u00FC"').undump) assert_equal(S("辰旦端"), S('"\xC3\xA4\xC3\xB6\xC3\xBC"').undump) @@ -783,8 +788,16 @@ CODE https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L788 assert_raise(RuntimeError) { S('"\u"').undump } assert_raise(RuntimeError) { S('"\u{"').undump } + assert_raise(RuntimeError) { S('"\u304"').undump } + assert_raise(RuntimeError) { S('"\u304Z"').undump } + assert_raise(RuntimeError) { S('"\udfff"').undump } + assert_raise(RuntimeError) { S('"\u{dfff}"').undump } assert_raise(RuntimeError) { S('"\u{3042"').undump } + assert_raise(RuntimeError) { S('"\u{3042 "').undump } + assert_raise(RuntimeError) { S('"\u{110000}"').undump } + assert_raise(RuntimeError) { S('"\u{1234567}"').undump } assert_raise(RuntimeError) { S('"\x"').undump } + assert_raise(RuntimeError) { S('"\xA"').undump } assert_raise(RuntimeError) { S('"\\"').undump } assert_raise(RuntimeError) { S(%("\0")).undump } end Index: string.c =================================================================== --- string.c (revision 61378) +++ string.c (revision 61379) @@ -6163,21 +6163,19 @@ unescape_ascii(unsigned int c) https://github.com/ruby/ruby/blob/trunk/string.c#L6163 static long undump_after_backslash(VALUE undumped, const char *s, const char *s_end, rb_encoding **penc) { - unsigned int c, c2; - long n; + const char *s0 = s; + unsigned int c; int codelen; size_t hexlen; char buf[6]; static rb_encoding *enc_utf8 = NULL; - c = rb_enc_codepoint_len(s, s_end, &codelen, *penc); - n = codelen; - switch (c) { + switch (*s) { case '\\': case '"': case '#': - rb_str_cat(undumped, s, n); /* cat itself */ - n++; + rb_str_cat(undumped, s, 1); /* cat itself */ + s++; break; case 'n': case 'r': @@ -6187,77 +6185,78 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6185 case 'b': case 'a': case 'e': - *buf = (char)unescape_ascii(c); - rb_str_cat(undumped, buf, n); - n++; + *buf = (char)unescape_ascii(*s); + rb_str_cat(undumped, buf, 1); + s++; break; case 'u': - if (s+1 >= s_end) { + if (++s >= s_end) { rb_raise(rb_eRuntimeError, "invalid Unicode escape"); } if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding(); if (*penc != enc_utf8) { *penc = enc_utf8; rb_enc_associate(undumped, enc_utf8); - ENC_CODERANGE_CLEAR(undumped); } - c2 = rb_enc_codepoint_len(s+1, s_end, NULL, *penc); - if (c2 == '{') { /* handle \u{...} form */ - const char *hexstr = s + 2; - int hex; - - while ((hex = rb_enc_ascget(hexstr, s_end, &codelen, *penc)) != '}') { - if (hex == -1) { + if (*s == '{') { /* handle \u{...} form */ + s++; + for (;;) { + if (s >= s_end) { rb_raise(rb_eRuntimeError, "unterminated Unicode escape"); } - if (ISSPACE(hex)) { - hexstr += codelen; + if (*s == '}') { + s++; + break; + } + if (ISSPACE(*s)) { + s++; continue; } - hex = scan_hex(hexstr, s_end-hexstr, &hexlen); + c = scan_hex(s, s_end-s, &hexlen); if (hexlen == 0 || hexlen > 6) { rb_raise(rb_eRuntimeError, "invalid Unicode escape"); } - if (hex > 0x10ffff) { + if (c > 0x10ffff) { rb_raise(rb_eRuntimeError, "invalid Unicode codepoint (too large)"); } - if ((hex & 0xfffff800) == 0xd800) { + if (0xd800 <= c && c <= 0xdfff) { rb_raise(rb_eRuntimeError, "invalid Unicode codepoint"); } - codelen = rb_enc_mbcput(hex, buf, *penc); + codelen = rb_enc_mbcput(c, buf, *penc); rb_str_cat(undumped, buf, codelen); - hexstr += hexlen; + s += hexlen; } - n += hexstr - s + 1; } else { /* handle \uXXXX form */ - int hex = scan_hex(s+1, 4, &hexlen); + c = scan_hex(s, 4, &hexlen); if (hexlen != 4) { rb_raise(rb_eRuntimeError, "invalid Unicode escape"); } - codelen = rb_enc_codelen(hex, *penc); - rb_enc_mbcput(hex, buf, *penc); + if (0xd800 <= c && c <= 0xdfff) { + rb_raise(rb_eRuntimeError, "invalid Unicode codepoint"); + } + codelen = rb_enc_mbcput(c, buf, *penc); rb_str_cat(undumped, buf, codelen); - n += rb_strlen_lit("uXXXX"); + s += hexlen; } break; case 'x': - if (s+1 >= s_end) { + if (++s >= s_end) { rb_raise(rb_eRuntimeError, "invalid hex escape"); } - c2 = scan_hex(s+1, 2, &hexlen); + *buf = scan_hex(s, 2, &hexlen); if (hexlen != 2) { rb_raise(rb_eRuntimeError, "invalid hex escape"); } - *buf = (char)c2; - rb_str_cat(undumped, buf, 1L); - n += rb_strlen_lit("xXX"); + rb_str_cat(undumped, buf, 1); + s += hexlen; break; default: - rb_str_cat(undumped, "\\", 1L); /* keep backslash */ + rb_str_cat(undumped, s-1, 2); + s++; } - return n; + return s - s0 + 1; } static VALUE rb_str_is_ascii_only_p(VALUE str); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/