ruby-changes:43655
From: duerst <ko1@a...>
Date: Fri, 22 Jul 2016 17:13:42 +0900 (JST)
Subject: [ruby-changes:43655] duerst:r55728 (trunk): * string.c (String#dump): Change escaping of non-ASCII characters in
duerst 2016-07-22 17:13:38 +0900 (Fri, 22 Jul 2016) New Revision: 55728 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=55728 Log: * string.c (String#dump): Change escaping of non-ASCII characters in UTF-8 to use upper-case four-digit hexadecimal escapes without braces where possible [Feature #12419]. * test/ruby/test_string.rb (test_dump): Add tests for above. Modified files: trunk/ChangeLog trunk/string.c trunk/test/ruby/test_string.rb Index: string.c =================================================================== --- string.c (revision 55727) +++ string.c (revision 55728) @@ -5656,12 +5656,16 @@ rb_str_dump(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L5656 len++; } else { - if (u8 && c > 0x7F) { /* \u{NN} */ + if (u8 && c > 0x7F) { /* \u notation */ int n = rb_enc_precise_mbclen(p-1, pend, enc); if (MBCLEN_CHARFOUND_P(n)) { unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc); - while (cc >>= 4) len++; - len += 5; + if (cc <= 0xFFFF) + len += 6; /* \uXXXX */ + else if (cc <= 0xFFFFF) + len += 9; /* \u{XXXXX} */ + else + len += 10; /* \u{XXXXXX} */ p += MBCLEN_CHARFOUND_LEN(n)-1; break; } @@ -5734,7 +5738,10 @@ rb_str_dump(VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L5738 if (MBCLEN_CHARFOUND_P(n)) { int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc); p += n; - snprintf(q, qend-q, "u{%x}", cc); + if (cc <= 0xFFFF) + snprintf(q, qend-q, "u%04X", cc); /* \uXXXX */ + else + snprintf(q, qend-q, "u{%X}", cc); /* \u{XXXXX} or \u{XXXXXX} */ q += strlen(q); continue; } Index: test/ruby/test_string.rb =================================================================== --- test/ruby/test_string.rb (revision 55727) +++ test/ruby/test_string.rb (revision 55728) @@ -614,6 +614,18 @@ CODE https://github.com/ruby/ruby/blob/trunk/test/ruby/test_string.rb#L614 def test_dump a= S("Test") << 1 << 2 << 3 << 9 << 13 << 10 assert_equal(S('"Test\\x01\\x02\\x03\\t\\r\\n"'), a.dump) + b= S("\u{7F}") + assert_equal(S('"\\x7F"'), b.dump) + b= S("\u{AB}") + assert_equal(S('"\\u00AB"'), b.dump) + b= S("\u{ABC}") + assert_equal(S('"\\u0ABC"'), b.dump) + b= S("\uABCD") + assert_equal(S('"\\uABCD"'), b.dump) + b= S("\u{ABCDE}") + assert_equal(S('"\\u{ABCDE}"'), b.dump) + b= S("\u{10ABCD}") + assert_equal(S('"\\u{10ABCD}"'), b.dump) end def test_dup Index: ChangeLog =================================================================== --- ChangeLog (revision 55727) +++ ChangeLog (revision 55728) @@ -1,3 +1,11 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Fri Jul 22 17:13:37 2016 Martin Duerst <duerst@i...> + + * string.c (String#dump): Change escaping of non-ASCII characters in + UTF-8 to use upper-case four-digit hexadecimal escapes without braces + where possible [Feature #12419]. + + * test/ruby/test_string.rb (test_dump): Add tests for above. + Fri Jul 22 10:35:35 2016 Kouhei Sutou <kou@c...> * lib/rexml/attribute.rb (REXML::Attribute#to_string): Fix wrong -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/