ruby-changes:28365
From: nobu <ko1@a...>
Date: Tue, 23 Apr 2013 11:59:02 +0900 (JST)
Subject: [ruby-changes:28365] nobu:r40417 (trunk): string.c: fix for UTF-32
nobu 2013-04-23 11:58:51 +0900 (Tue, 23 Apr 2013) New Revision: 40417 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=40417 Log: string.c: fix for UTF-32 * string.c (rb_str_scrub): fix for UTF-32. strlen() on strings contain NUL returns wrong result, use sizeof operator instead. [ruby-dev:45975] [Feature #6752] Modified files: trunk/ChangeLog trunk/string.c trunk/test/ruby/test_m17n.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 40416) +++ ChangeLog (revision 40417) @@ -1,3 +1,9 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1 +Tue Apr 23 11:58:46 2013 Nobuyoshi Nakada <nobu@r...> + + * string.c (rb_str_scrub): fix for UTF-32. strlen() on strings + contain NUL returns wrong result, use sizeof operator instead. + [ruby-dev:45975] [Feature #6752] + Tue Apr 23 10:26:50 2013 Akinori MUSHA <knu@i...> * test/ruby/test_module.rb Index: string.c =================================================================== --- string.c (revision 40416) +++ string.c (revision 40417) @@ -7805,6 +7805,11 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7805 return rb_str_dup(str); } +#define DEFAULT_REPLACE_CHAR(str) do { \ + static const char replace[sizeof(str)-1] = str; \ + rep = replace; replen = (int)sizeof(replace); \ + } while (0) + if (rb_enc_asciicompat(enc)) { const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); @@ -7824,13 +7829,11 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7829 rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT); } else if (enc == rb_utf8_encoding()) { - rep = "\xEF\xBF\xBD"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD"); rep7bit_p = FALSE; } else { - rep = "?"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("?"); rep7bit_p = TRUE; } cr = ENC_CODERANGE_7BIT; @@ -7938,24 +7941,19 @@ rb_str_scrub(int argc, VALUE *argv, VALU https://github.com/ruby/ruby/blob/trunk/string.c#L7941 replen = RSTRING_LEN(repl); } else if (enc == utf16be) { - rep = "\xFF\xFD"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\xFF\xFD"); } else if (enc == utf16le) { - rep = "\xFD\xFF"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\xFD\xFF"); } else if (enc == utf32be) { - rep = "\x00\x00\xFF\xFD"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD"); } else if (enc == utf32le) { - rep = "\xFD\xFF\x00\x00"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00"); } else { - rep = "?"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("?"); } while (p < e) { Index: test/ruby/test_m17n.rb =================================================================== --- test/ruby/test_m17n.rb (revision 40416) +++ test/ruby/test_m17n.rb (revision 40417) @@ -1522,5 +1522,11 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_m17n.rb#L1522 assert_equal("\uFFFD\u3042".encode("UTF-16LE"), "\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE). scrub) + assert_equal("\uFFFD".encode("UTF-32BE"), + "\xff".force_encoding(Encoding::UTF_32BE). + scrub) + assert_equal("\uFFFD".encode("UTF-32LE"), + "\xff".force_encoding(Encoding::UTF_32LE). + scrub) end end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/