ruby-changes:4075
From: ko1@a...
Date: Thu, 21 Feb 2008 17:42:31 +0900 (JST)
Subject: [ruby-changes:4075] duerst - Ruby:r15565 (trunk): Thu Feb 21 17:15:15 2008 Martin Duerst <duerst@i...>
duerst 2008-02-21 17:42:10 +0900 (Thu, 21 Feb 2008) New Revision: 15565 Modified files: trunk/ChangeLog trunk/test/ruby/test_transcode.rb trunk/transcode.c Log: Thu Feb 21 17:15:15 2008 Martin Duerst <duerst@i...> * transcode.c: Added basic support for passing options to String#encode via a hash. Currently only one option, with one value, is supported: invalid: :ignore (dropping invalid byte sequences instead of producing an error). Option naming is not yet stable! * test/ruby/test_transcode.rb: Added a single test for invalid: :ignore option. Not more tests because most data does not yet distinguish between INVALID and UNKNOWN. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_transcode.rb?r1=15565&r2=15564&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15565&r2=15564&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/transcode.c?r1=15565&r2=15564&diff_format=u Index: ChangeLog =================================================================== --- ChangeLog (revision 15564) +++ ChangeLog (revision 15565) @@ -1,3 +1,14 @@ +Thu Feb 21 17:15:15 2008 Martin Duerst <duerst@i...> + + * transcode.c: Added basic support for passing options to String#encode + via a hash. Currently only one option, with one value, is supported: + invalid: :ignore (dropping invalid byte sequences instead of + producing an error). Option naming is not yet stable! + + * test/ruby/test_transcode.rb: Added a single test for invalid: :ignore + option. Not more tests because most data does not yet distinguish + between INVALID and UNKNOWN. + Thu Feb 21 16:35:26 2008 Nobuyoshi Nakada <nobu@r...> * array.c (rb_ary_unshift_m): expands enough for argc. [ruby-dev:33880] Index: test/ruby/test_transcode.rb =================================================================== --- test/ruby/test_transcode.rb (revision 15564) +++ test/ruby/test_transcode.rb (revision 15565) @@ -19,8 +19,6 @@ end def test_errors - # we don't have semantics for conversion without attribute yet - # maybe 'convert to UTF-8' would be nice :-) assert_raise(ArgumentError) { 'abc'.encode } assert_raise(ArgumentError) { 'abc'.encode! } assert_raise(ArgumentError) { 'abc'.encode('foo', 'bar') } @@ -241,4 +239,9 @@ check_utf_32_both_ways("\u{8FF00}", "\x00\x08\xFF\x00") check_utf_32_both_ways("\u{F00FF}", "\x00\x0F\x00\xFF") end + + def test_invalid_ignore + # arguments only + 'abc'.encode('utf-8', invalid: :ignore) + end end Index: transcode.c =================================================================== --- transcode.c (revision 15564) +++ transcode.c (revision 15565) @@ -15,6 +15,9 @@ #include "transcode_data.h" #include <ctype.h> +static VALUE sym_invalid, sym_ignore; +#define INVALID_IGNORE 0x1 + /* * Dispatch data and logic */ @@ -132,7 +135,8 @@ transcode_loop(unsigned char **in_pos, unsigned char **out_pos, unsigned char *in_stop, unsigned char *out_stop, const rb_transcoder *my_transcoder, - rb_transcoding *my_transcoding) + rb_transcoding *my_transcoding, + const int opt) { unsigned char *in_p = *in_pos, *out_p = *out_pos; const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start; @@ -211,14 +215,17 @@ case INVALID: goto invalid; case UNDEF: - /* todo: add code for alternative behaviors */ + /* todo: add code for alternate behaviors */ rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)"); continue; } continue; invalid: /* deal with invalid byte sequence */ - /* todo: add code for alternative behaviors */ + /* todo: add more alternative behaviors */ + if (opt&INVALID_IGNORE) { + continue; + } rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence"); continue; } @@ -254,7 +261,22 @@ const rb_transcoder *my_transcoder; rb_transcoding my_transcoding; int final_encoding = 0; + VALUE opt; + int options = 0; + opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash"); + if (!NIL_P(opt)) { + VALUE v; + + argc--; + v = rb_hash_aref(opt, sym_invalid); + if (NIL_P(v)) { + rb_raise(rb_eArgError, "unknown value for invalid: setting"); + } + else if (v==sym_ignore) { + options |= INVALID_IGNORE; + } + } if (argc < 1 || argc > 2) { rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc); } @@ -325,7 +347,7 @@ my_transcoding.ruby_string_dest = dest; my_transcoding.flush_func = str_transcoding_resize; - transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding); + transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding, options); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp); } @@ -426,6 +448,9 @@ transcoder_lib_table = st_init_strcasetable(); init_transcoder_table(); + sym_invalid = ID2SYM(rb_intern("invalid")); + sym_ignore = ID2SYM(rb_intern("ignore")); + rb_define_method(rb_cString, "encode", rb_str_transcode, -1); rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1); } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/