ruby-changes:6546
From: naruse <ko1@a...>
Date: Mon, 14 Jul 2008 15:27:41 +0900 (JST)
Subject: [ruby-changes:6546] Ruby:r18062 (trunk): * transcode.c (get_replacement_character): temporary function,
naruse 2008-07-14 15:27:26 +0900 (Mon, 14 Jul 2008) New Revision: 18062 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18062 Log: * transcode.c (get_replacement_character): temporary function, get characters for replacement mode. * transcode.c (transcode_loop): add undef key and replace value. * transcode.c (str_transcode): ditto. * transcode.c (Init_transcode): define sym_undef and sym_replace. Modified files: trunk/ChangeLog trunk/transcode.c Index: ChangeLog =================================================================== --- ChangeLog (revision 18061) +++ ChangeLog (revision 18062) @@ -1,3 +1,14 @@ +Mon Jul 14 15:18:30 2008 NARUSE, Yui <naruse@r...> + + * transcode.c (get_replacement_character): temporary function, + get characters for replacement mode. + + * transcode.c (transcode_loop): add undef key and replace value. + + * transcode.c (str_transcode): ditto. + + * transcode.c (Init_transcode): define sym_undef and sym_replace. + Mon Jul 14 15:16:40 2008 Kazuhiro NISHIYAMA <zn@m...> * test/ruby/test_variable.rb (TestVariable#test_global_variable_0): Index: transcode.c =================================================================== --- transcode.c (revision 18061) +++ transcode.c (revision 18062) @@ -15,8 +15,11 @@ #include "transcode_data.h" #include <ctype.h> -static VALUE sym_invalid, sym_ignore; +static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace; #define INVALID_IGNORE 0x1 +#define INVALID_REPLACE 0x2 +#define UNDEF_IGNORE 0x10 +#define UNDEF_REPLACE 0x20 /* * Dispatch data and logic @@ -119,6 +122,36 @@ return (rb_transcoder *)val; } +static const char* +get_replacement_character(rb_encoding *enc) +{ + static rb_encoding *utf16be_encoding, *utf16le_encoding; + static rb_encoding *utf32be_encoding, *utf32le_encoding; + if (!utf16be_encoding) { + utf16be_encoding = rb_enc_find("UTF-16BE"); + utf16le_encoding = rb_enc_find("UTF-16LE"); + utf32be_encoding = rb_enc_find("UTF-32BE"); + utf32le_encoding = rb_enc_find("UTF-32LE"); + } + if (rb_enc_asciicompat(enc)) { + return "?"; + } + else if (utf16be_encoding = enc) { + return "\x00?"; + } + else if (utf16le_encoding = enc) { + return "?\x00"; + } + else if (utf32be_encoding = enc) { + return "\x00\x00\x00?"; + } + else if (utf32le_encoding = enc) { + return "?\x00\x00\x00"; + } + else { + return "?"; + } +} /* * Transcoding engine logic @@ -139,6 +172,7 @@ unsigned char next_byte; int from_utf8 = my_transcoder->from_utf8; unsigned char *out_s = out_stop - my_transcoder->max_output + 1; + rb_encoding *to_encoding = rb_enc_find(my_transcoder->to_encoding); while (in_p < in_stop) { char_start = in_p; next_table = conv_tree_start; @@ -209,9 +243,7 @@ case INVALID: goto invalid; case UNDEF: - /* todo: add code for alternate behaviors */ - rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)"); - continue; + goto undef; } continue; invalid: @@ -220,8 +252,31 @@ if (opt&INVALID_IGNORE) { continue; } + else if (opt&INVALID_REPLACE) { + const char *rep = get_replacement_character(to_encoding); + do { + *out_p++ = *rep++; + } while (*rep); + continue; + } rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence"); continue; + undef: + /* valid character in from encoding + * but no related character(s) in to encoding */ + /* todo: add more alternative behaviors */ + if (opt&UNDEF_IGNORE) { + continue; + } + else if (opt&UNDEF_REPLACE) { + const char *rep = get_replacement_character(to_encoding); + do { + *out_p++ = *rep++; + } while (*rep); + continue; + } + rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)"); + continue; } /* cleanup */ *in_pos = in_p; @@ -265,11 +320,29 @@ argc--; v = rb_hash_aref(opt, sym_invalid); if (NIL_P(v)) { - rb_raise(rb_eArgError, "unknown value for invalid: setting"); } else if (v==sym_ignore) { options |= INVALID_IGNORE; } + else if (v==sym_replace) { + options |= INVALID_REPLACE; + v = rb_hash_aref(opt, sym_replace); + } + else { + rb_raise(rb_eArgError, "unknown value for invalid: setting"); + } + v = rb_hash_aref(opt, sym_undef); + if (NIL_P(v)) { + } + else if (v==sym_ignore) { + options |= UNDEF_IGNORE; + } + else if (v==sym_replace) { + options |= UNDEF_REPLACE; + } + else { + rb_raise(rb_eArgError, "unknown value for undef: setting"); + } } if (argc < 1 || argc > 2) { rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc); @@ -451,7 +524,9 @@ transcoder_lib_table = st_init_strcasetable(); sym_invalid = ID2SYM(rb_intern("invalid")); + sym_undef = ID2SYM(rb_intern("undef")); sym_ignore = ID2SYM(rb_intern("ignore")); + sym_replace = ID2SYM(rb_intern("replace")); rb_define_method(rb_cString, "encode", str_encode, -1); rb_define_method(rb_cString, "encode!", str_encode_bang, -1); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/