ruby-changes:6784
From: naruse <ko1@a...>
Date: Fri, 1 Aug 2008 05:35:54 +0900 (JST)
Subject: [ruby-changes:6784] Ruby:r18300 (trunk): * transcode.c (output_replacement_character):
naruse 2008-08-01 05:35:35 +0900 (Fri, 01 Aug 2008) New Revision: 18300 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18300 Log: * transcode.c (output_replacement_character): rename from _get_replacement_character. * transcode.c (output_replacement_character): fix replacement on UTF-32{BE,LE}. [ruby-dev:35705] * transcode.c (transcode_loop): ditto. * test/ruby/test_transcode.rb (test_invalid_replace): add for above. Modified files: trunk/ChangeLog trunk/test/ruby/test_transcode.rb trunk/transcode.c Index: ChangeLog =================================================================== --- ChangeLog (revision 18299) +++ ChangeLog (revision 18300) @@ -1,3 +1,16 @@ +Fri Aug 1 05:31:08 2008 NARUSE, Yui <naruse@r...> + + * transcode.c (output_replacement_character): + rename from _get_replacement_character. + + * transcode.c (output_replacement_character): + fix replacement on UTF-32{BE,LE}. [ruby-dev:35705] + + * transcode.c (transcode_loop): ditto. + + * test/ruby/test_transcode.rb (test_invalid_replace): + add for above. + Fri Aug 1 01:01:49 2008 Yusuke Endoh <mame@t...> * proc.c (rb_proc_call_with_block): reduce comparison. Index: test/ruby/test_transcode.rb =================================================================== --- test/ruby/test_transcode.rb (revision 18299) +++ test/ruby/test_transcode.rb (revision 18300) @@ -254,6 +254,21 @@ "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) end + def test_invalid_replace + # arguments only + assert_nothing_raised { 'abc'.encode('UTF-8', invalid: :replace) } + assert_equal("\xEF\xBF\xBD".force_encoding("UTF-8"), + "\x80".encode("UTF-8", "UTF-16BE", invalid: :replace)) + assert_equal("\xFF\xFD".force_encoding("UTF-16BE"), + "\x80".encode("UTF-16BE", "UTF-8", invalid: :replace)) + assert_equal("\xFD\xFF".force_encoding("UTF-16LE"), + "\x80".encode("UTF-16LE", "UTF-8", invalid: :replace)) + assert_equal("\x00\x00\xFF\xFD".force_encoding("UTF-32BE"), + "\x80".encode("UTF-32BE", "UTF-8", invalid: :replace)) + assert_equal("\xFD\xFF\x00\x00".force_encoding("UTF-32LE"), + "\x80".encode("UTF-32LE", "UTF-8", invalid: :replace)) + end + def test_shift_jis check_both_ways("\u3000", "\x81\x40", 'shift_jis') # full-width space check_both_ways("\u00D7", "\x81\x7E", 'shift_jis') # ~ Index: transcode.c =================================================================== --- transcode.c (revision 18299) +++ transcode.c (revision 18300) @@ -91,7 +91,7 @@ #define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0) static const rb_transcoder * -transcode_dispatch(const char* from_encoding, const char* to_encoding) +transcode_dispatch(const char *from_encoding, const char *to_encoding) { char *const key = transcoder_key(from_encoding, to_encoding); st_data_t k, val = 0; @@ -122,9 +122,10 @@ return (rb_transcoder *)val; } -static const char* -get_replacement_character(rb_encoding *enc) +static void +output_replacement_character(unsigned char **out_pp, rb_encoding *enc) { + unsigned char *out_p = *out_pp; static rb_encoding *utf16be_encoding, *utf16le_encoding; static rb_encoding *utf32be_encoding, *utf32le_encoding; if (!utf16be_encoding) { @@ -133,24 +134,36 @@ utf32be_encoding = rb_enc_find("UTF-32BE"); utf32le_encoding = rb_enc_find("UTF-32LE"); } - if (rb_enc_asciicompat(enc)) { - return "?"; + if (rb_utf8_encoding() == enc) { + *out_p++ = 0xEF; + *out_p++ = 0xBF; + *out_p++ = 0xBD; } else if (utf16be_encoding == enc) { - return "\xFF\xFD"; + *out_p++ = 0xFF; + *out_p++ = 0xFD; } else if (utf16le_encoding == enc) { - return "\xFD\xFF"; + *out_p++ = 0xFD; + *out_p++ = 0xFF; } else if (utf32be_encoding == enc) { - return "\x00\x00\xFF\xFD"; + *out_p++ = 0x00; + *out_p++ = 0x00; + *out_p++ = 0xFF; + *out_p++ = 0xFD; } else if (utf32le_encoding == enc) { - return "\xFD\xFF\x00\x00"; + *out_p++ = 0xFD; + *out_p++ = 0xFF; + *out_p++ = 0x00; + *out_p++ = 0x00; } else { - return "?"; + *out_p++ = '?'; } + *out_pp = out_p; + return; } /* @@ -255,10 +268,7 @@ continue; } else if (opt&INVALID_REPLACE) { - const char *rep = get_replacement_character(to_encoding); - do { - *out_p++ = *rep++; - } while (*rep); + output_replacement_character(&out_p, to_encoding); continue; } rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence"); @@ -271,10 +281,7 @@ continue; } else if (opt&UNDEF_REPLACE) { - const char *rep = get_replacement_character(to_encoding); - do { - *out_p++ = *rep++; - } while (*rep); + output_replacement_character(&out_p, to_encoding); continue; } rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)"); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/