[前][次][番号順一覧][スレッド一覧]

ruby-changes:49342

From: naruse <ko1@a...>
Date: Mon, 25 Dec 2017 10:35:07 +0900 (JST)
Subject: [ruby-changes:49342] naruse:r61379 (trunk): fix escapes in undump

naruse	2017-12-21 14:08:57 +0900 (Thu, 21 Dec 2017)

  New Revision: 61379

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=61379

  Log:
    fix escapes in undump

  Modified files:
    trunk/string.c
    trunk/test/ruby/test_string.rb
Index: test/ruby/test_string.rb
===================================================================
--- test/ruby/test_string.rb	(revision 61378)
+++ test/ruby/test_string.rb	(revision 61379)
@@ -756,13 +756,18 @@ CODE https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L756
   def test_undump
     a = S("Test") << 1 << 2 << 3 << 9 << 13 << 10
     assert_equal(a, S('"Test\\x01\\x02\\x03\\t\\r\\n"').undump)
+    assert_equal(S("\\ca"), S('"\\ca"').undump)
     assert_equal(S("\u{7F}"), S('"\\x7F"').undump)
+    assert_equal(S("\u{7F}A"), S('"\\x7FA"').undump)
     assert_equal(S("\u{AB}"), S('"\\u00AB"').undump)
     assert_equal(S("\u{ABC}"), S('"\\u0ABC"').undump)
     assert_equal(S("\uABCD"), S('"\\uABCD"').undump)
+    assert_equal(S("\uABCD"), S('"\\uABCD"').undump)
     assert_equal(S("\u{ABCDE}"), S('"\\u{ABCDE}"').undump)
     assert_equal(S("\u{10ABCD}"), S('"\\u{10ABCD}"').undump)
     assert_equal(S("\u{ABCDE 10ABCD}"), S('"\\u{ABCDE 10ABCD}"').undump)
+    assert_equal(S(""), S('"\\u{}"').undump)
+    assert_equal(S(""), S('"\\u{  }"').undump)
 
     assert_equal(S("辰旦端"), S('"\u00E4\u00F6\u00FC"').undump)
     assert_equal(S("辰旦端"), S('"\xC3\xA4\xC3\xB6\xC3\xBC"').undump)
@@ -783,8 +788,16 @@ CODE https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L788
 
     assert_raise(RuntimeError) { S('"\u"').undump }
     assert_raise(RuntimeError) { S('"\u{"').undump }
+    assert_raise(RuntimeError) { S('"\u304"').undump }
+    assert_raise(RuntimeError) { S('"\u304Z"').undump }
+    assert_raise(RuntimeError) { S('"\udfff"').undump }
+    assert_raise(RuntimeError) { S('"\u{dfff}"').undump }
     assert_raise(RuntimeError) { S('"\u{3042"').undump }
+    assert_raise(RuntimeError) { S('"\u{3042 "').undump }
+    assert_raise(RuntimeError) { S('"\u{110000}"').undump }
+    assert_raise(RuntimeError) { S('"\u{1234567}"').undump }
     assert_raise(RuntimeError) { S('"\x"').undump }
+    assert_raise(RuntimeError) { S('"\xA"').undump }
     assert_raise(RuntimeError) { S('"\\"').undump }
     assert_raise(RuntimeError) { S(%("\0")).undump }
   end
Index: string.c
===================================================================
--- string.c	(revision 61378)
+++ string.c	(revision 61379)
@@ -6163,21 +6163,19 @@ unescape_ascii(unsigned int c) https://github.com/ruby/ruby/blob/trunk/string.c#L6163
 static long
 undump_after_backslash(VALUE undumped, const char *s, const char *s_end, rb_encoding **penc)
 {
-    unsigned int c, c2;
-    long n;
+    const char *s0 = s;
+    unsigned int c;
     int codelen;
     size_t hexlen;
     char buf[6];
     static rb_encoding *enc_utf8 = NULL;
 
-    c = rb_enc_codepoint_len(s, s_end, &codelen, *penc);
-    n = codelen;
-    switch (c) {
+    switch (*s) {
       case '\\':
       case '"':
       case '#':
-	rb_str_cat(undumped, s, n); /* cat itself */
-	n++;
+	rb_str_cat(undumped, s, 1); /* cat itself */
+	s++;
 	break;
       case 'n':
       case 'r':
@@ -6187,77 +6185,78 @@ undump_after_backslash(VALUE undumped, c https://github.com/ruby/ruby/blob/trunk/string.c#L6185
       case 'b':
       case 'a':
       case 'e':
-	*buf = (char)unescape_ascii(c);
-	rb_str_cat(undumped, buf, n);
-	n++;
+	*buf = (char)unescape_ascii(*s);
+	rb_str_cat(undumped, buf, 1);
+	s++;
 	break;
       case 'u':
-	if (s+1 >= s_end) {
+	if (++s >= s_end) {
 	    rb_raise(rb_eRuntimeError, "invalid Unicode escape");
 	}
 	if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
 	if (*penc != enc_utf8) {
 	    *penc = enc_utf8;
 	    rb_enc_associate(undumped, enc_utf8);
-	    ENC_CODERANGE_CLEAR(undumped);
 	}
-	c2 = rb_enc_codepoint_len(s+1, s_end, NULL, *penc);
-	if (c2 == '{') { /* handle \u{...} form */
-	    const char *hexstr = s + 2;
-	    int hex;
-
-	    while ((hex = rb_enc_ascget(hexstr, s_end, &codelen, *penc)) != '}') {
-		if (hex == -1) {
+	if (*s == '{') { /* handle \u{...} form */
+	    s++;
+	    for (;;) {
+		if (s >= s_end) {
 		    rb_raise(rb_eRuntimeError, "unterminated Unicode escape");
 		}
-		if (ISSPACE(hex)) {
-		    hexstr += codelen;
+		if (*s == '}') {
+		    s++;
+		    break;
+		}
+		if (ISSPACE(*s)) {
+		    s++;
 		    continue;
 		}
-		hex = scan_hex(hexstr, s_end-hexstr, &hexlen);
+		c = scan_hex(s, s_end-s, &hexlen);
 		if (hexlen == 0 || hexlen > 6) {
 		    rb_raise(rb_eRuntimeError, "invalid Unicode escape");
 		}
-		if (hex > 0x10ffff) {
+		if (c > 0x10ffff) {
 		    rb_raise(rb_eRuntimeError, "invalid Unicode codepoint (too large)");
 		}
-		if ((hex & 0xfffff800) == 0xd800) {
+		if (0xd800 <= c && c <= 0xdfff) {
 		    rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
 		}
-		codelen = rb_enc_mbcput(hex, buf, *penc);
+		codelen = rb_enc_mbcput(c, buf, *penc);
 		rb_str_cat(undumped, buf, codelen);
-		hexstr += hexlen;
+		s += hexlen;
 	    }
-	    n += hexstr - s + 1;
 	}
 	else { /* handle \uXXXX form */
-	    int hex = scan_hex(s+1, 4, &hexlen);
+	    c = scan_hex(s, 4, &hexlen);
 	    if (hexlen != 4) {
 		rb_raise(rb_eRuntimeError, "invalid Unicode escape");
 	    }
-	    codelen = rb_enc_codelen(hex, *penc);
-	    rb_enc_mbcput(hex, buf, *penc);
+	    if (0xd800 <= c && c <= 0xdfff) {
+		rb_raise(rb_eRuntimeError, "invalid Unicode codepoint");
+	    }
+	    codelen = rb_enc_mbcput(c, buf, *penc);
 	    rb_str_cat(undumped, buf, codelen);
-	    n += rb_strlen_lit("uXXXX");
+	    s += hexlen;
 	}
 	break;
       case 'x':
-	if (s+1 >= s_end) {
+	if (++s >= s_end) {
 	    rb_raise(rb_eRuntimeError, "invalid hex escape");
 	}
-	c2 = scan_hex(s+1, 2, &hexlen);
+	*buf = scan_hex(s, 2, &hexlen);
 	if (hexlen != 2) {
 	    rb_raise(rb_eRuntimeError, "invalid hex escape");
 	}
-	*buf = (char)c2;
-	rb_str_cat(undumped, buf, 1L);
-	n += rb_strlen_lit("xXX");
+	rb_str_cat(undumped, buf, 1);
+	s += hexlen;
 	break;
       default:
-	rb_str_cat(undumped, "\\", 1L); /* keep backslash */
+	rb_str_cat(undumped, s-1, 2);
+	s++;
     }
 
-    return n;
+    return s - s0 + 1;
 }
 
 static VALUE rb_str_is_ascii_only_p(VALUE str);

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]