ruby-changes:7006
From: akr <ko1@a...>
Date: Tue, 12 Aug 2008 16:20:38 +0900 (JST)
Subject: [ruby-changes:7006] Ruby:r18524 (trunk): * transcode_data.h (TRANSCODE_ERROR): removed.
akr 2008-08-12 16:20:10 +0900 (Tue, 12 Aug 2008) New Revision: 18524 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18524 Log: * transcode_data.h (TRANSCODE_ERROR): removed. * tool/transcode-tblgen.rb: 8bit byte of ASCII-8BIT is a valid (but unique to ASCII-8BIT) character. * transcode.c (rb_eConversionUndefined): new error. (rb_eInvalidByteSequence): ditto. Modified files: trunk/ChangeLog trunk/test/ruby/test_transcode.rb trunk/tool/transcode-tblgen.rb trunk/transcode.c trunk/transcode_data.h Index: ChangeLog =================================================================== --- ChangeLog (revision 18523) +++ ChangeLog (revision 18524) @@ -1,3 +1,13 @@ +Tue Aug 12 16:13:45 2008 Tanaka Akira <akr@f...> + + * transcode_data.h (TRANSCODE_ERROR): removed. + + * tool/transcode-tblgen.rb: 8bit byte of ASCII-8BIT is a valid + (but unique to ASCII-8BIT) character. + + * transcode.c (rb_eConversionUndefined): new error. + (rb_eInvalidByteSequence): ditto. + Tue Aug 12 16:08:51 2008 Minero Aoki <aamine@l...> * lib/net/pop.rb: POP3Command should export @socket since POP Index: transcode_data.h =================================================================== --- transcode_data.h (revision 18523) +++ transcode_data.h (revision 18524) @@ -125,6 +125,4 @@ void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib); void rb_register_transcoder(const rb_transcoder *); -#define TRANSCODE_ERROR rb_eRuntimeError /*change exception*/ - #endif /* RUBY_TRANSCODE_DATA_H */ Index: tool/transcode-tblgen.rb =================================================================== --- tool/transcode-tblgen.rb (revision 18523) +++ tool/transcode-tblgen.rb (revision 18524) @@ -317,7 +317,8 @@ min = max = 0 end - if n = OffsetsMemo[offsets] + offsets_key = [min, max, offsets[min..max]] + if n = OffsetsMemo[offsets_key] offsets_name = n offsets_code = '' else @@ -326,7 +327,7 @@ static const unsigned char #{offsets_name}[#{2+max-min+1}] = #{format_offsets(min,max,offsets)}; End - OffsetsMemo[offsets] = offsets_name + OffsetsMemo[offsets_key] = offsets_name end if n = InfosMemo[infos] @@ -524,6 +525,7 @@ } { + 'ASCII-8BIT' => '1byte', 'ISO-8859-1' => '1byte', 'ISO-8859-2' => '1byte', 'ISO-8859-3' => '1byte', Index: test/ruby/test_transcode.rb =================================================================== --- test/ruby/test_transcode.rb (revision 18523) +++ test/ruby/test_transcode.rb (revision 18524) @@ -25,9 +25,9 @@ assert_raise(ArgumentError) { 'abc'.encode!('foo', 'bar') } assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode('foo') } assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode!('foo') } - assert_raise(RuntimeError) { "\x80".encode('utf-8','ASCII-8BIT') } - assert_raise(RuntimeError) { "\x80".encode('utf-8','US-ASCII') } - assert_raise(RuntimeError) { "\xA5".encode('utf-8','iso-8859-3') } + assert_raise(Encoding::ConversionUndefined) { "\x80".encode('utf-8','ASCII-8BIT') } + assert_raise(Encoding::InvalidByteSequence) { "\x80".encode('utf-8','US-ASCII') } + assert_raise(Encoding::ConversionUndefined) { "\xA5".encode('utf-8','iso-8859-3') } end def test_arguments @@ -342,40 +342,39 @@ check_both_ways("\u71FC", "\xE0\x9E", 'shift_jis') # check_both_ways("\u71F9", "\xE0\x9F", 'shift_jis') # check_both_ways("\u73F1", "\xE0\xFC", 'shift_jis') # - assert_raise(RuntimeError) { "\xEF\x40".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xEF\x7E".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xEF\x80".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xEF\x9E".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xEF\x9F".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xEF\xFC".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xF0\x40".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xF0\x7E".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xF0\x80".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xF0\x9E".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xF0\x9F".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xF0\xFC".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xEF\x40".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xEF\x7E".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xEF\x80".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xEF\x9E".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xEF\x9F".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xEF\xFC".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xF0\x40".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xF0\x7E".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xF0\x80".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xF0\x9E".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xF0\x9F".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xF0\xFC".encode("utf-8", 'shift_jis') } check_both_ways("\u9ADC", "\xFC\x40", 'shift_jis') # @ - assert_raise(RuntimeError) { "\xFC\x7E".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xFC\x80".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xFC\x9E".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xFC\x9F".encode("utf-8", 'shift_jis') } - assert_raise(RuntimeError) { "\xFC\xFC".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xFC\x7E".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xFC\x80".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xFC\x9E".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xFC\x9F".encode("utf-8", 'shift_jis') } + assert_raise(Encoding::ConversionUndefined) { "\xFC\xFC".encode("utf-8", 'shift_jis') } check_both_ways("\u677E\u672C\u884C\u5F18", "\x8f\xbc\x96\x7b\x8d\x73\x8d\x4f", 'shift_jis') # {sO check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\x90\xC2\x8E\x52\x8A\x77\x89\x40\x91\xE5\x8A\x77", 'shift_jis') # Rw@w check_both_ways("\u795E\u6797\u7FA9\u535A", "\x90\x5F\x97\xD1\x8B\x60\x94\x8E", 'shift_jis') # _` end def test_iso_2022_jp - assert_raise(RuntimeError) { "\x1b(A".encode("utf-8", "iso-2022-jp") } - assert_raise(RuntimeError) { "\x1b$(A".encode("utf-8", "iso-2022-jp") } - assert_raise(RuntimeError) { "\x1b$C".encode("utf-8", "iso-2022-jp") } - assert_raise(RuntimeError) { "\x0e".encode("utf-8", "iso-2022-jp") } - assert_raise(RuntimeError) { "\x80".encode("utf-8", "iso-2022-jp") } - assert_raise(RuntimeError) { "\x1b$(Dd!\x1b(B".encode("utf-8", "iso-2022-jp") } - assert_raise(RuntimeError) { "\u9299".encode("iso-2022-jp") } - assert_raise(RuntimeError) { "\u9299".encode("iso-2022-jp") } - assert_raise(RuntimeError) { "\uff71\uff72\uff73\uff74\uff75".encode("iso-2022-jp") } - assert_raise(RuntimeError) { "\x1b(I12345\x1b(B".encode("utf-8", "iso-2022-jp") } + assert_raise(Encoding::InvalidByteSequence) { "\x1b(A".encode("utf-8", "iso-2022-jp") } + assert_raise(Encoding::InvalidByteSequence) { "\x1b$(A".encode("utf-8", "iso-2022-jp") } + assert_raise(Encoding::InvalidByteSequence) { "\x1b$C".encode("utf-8", "iso-2022-jp") } + assert_raise(Encoding::InvalidByteSequence) { "\x0e".encode("utf-8", "iso-2022-jp") } + assert_raise(Encoding::InvalidByteSequence) { "\x80".encode("utf-8", "iso-2022-jp") } + assert_raise(Encoding::InvalidByteSequence) { "\x1b$(Dd!\x1b(B".encode("utf-8", "iso-2022-jp") } + assert_raise(Encoding::ConversionUndefined) { "\u9299".encode("iso-2022-jp") } + assert_raise(Encoding::ConversionUndefined) { "\uff71\uff72\uff73\uff74\uff75".encode("iso-2022-jp") } + assert_raise(Encoding::InvalidByteSequence) { "\x1b(I12345\x1b(B".encode("utf-8", "iso-2022-jp") } assert_equal("\xA1\xA1".force_encoding("euc-jp"), "\e$B!!\e(B".encode("EUC-JP", "ISO-2022-JP")) assert_equal("\e$B!!\e(B".force_encoding("ISO-2022-JP"), Index: transcode.c =================================================================== --- transcode.c (revision 18523) +++ transcode.c (revision 18524) @@ -15,6 +15,9 @@ #include "transcode_data.h" #include <ctype.h> +VALUE rb_eConversionUndefined; +VALUE rb_eInvalidByteSequence; + static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace; #define INVALID_IGNORE 0x1 #define INVALID_REPLACE 0x2 @@ -895,7 +898,7 @@ goto resume; } rb_trans_close(ts); - rb_raise(TRANSCODE_ERROR, "invalid byte sequence"); + rb_raise(rb_eInvalidByteSequence, "invalid byte sequence"); } if (ret == transcode_undefined_conversion) { /* valid character in from encoding @@ -909,7 +912,7 @@ goto resume; } rb_trans_close(ts); - rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)"); + rb_raise(rb_eConversionUndefined, "conversion undefined for byte sequence (maybe invalid byte sequence)"); } if (ret == transcode_obuf_full) { more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop); @@ -974,7 +977,7 @@ break; } rb_trans_close(ts); - rb_raise(TRANSCODE_ERROR, "invalid byte sequence"); + rb_raise(rb_eInvalidByteSequence, "invalid byte sequence"); break; case transcode_undefined_conversion: @@ -989,7 +992,7 @@ break; } rb_trans_close(ts); - rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)"); + rb_raise(rb_eConversionUndefined, "conversion undefined for byte sequence (maybe invalid byte sequence)"); break; case transcode_obuf_full: @@ -1204,6 +1207,9 @@ void Init_transcode(void) { + rb_eConversionUndefined = rb_define_class_under(rb_cEncoding, "ConversionUndefined", rb_eStandardError); + rb_eInvalidByteSequence = rb_define_class_under(rb_cEncoding, "InvalidByteSequence", rb_eStandardError); + transcoder_table = st_init_strcasetable(); sym_invalid = ID2SYM(rb_intern("invalid")); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/