ruby-changes:7268
From: akr <ko1@a...>
Date: Sat, 23 Aug 2008 15:03:24 +0900 (JST)
Subject: [ruby-changes:7268] Ruby:r18787 (trunk): * include/ruby/encoding.h (ECONV_INVALID_MASK): defined.
akr 2008-08-23 15:02:58 +0900 (Sat, 23 Aug 2008) New Revision: 18787 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18787 Log: * include/ruby/encoding.h (ECONV_INVALID_MASK): defined. (ECONV_INVALID_IGNORE): defined. (ECONV_INVALID_REPLACE): defined. (ECONV_UNDEF_MASK): defined. (ECONV_UNDEF_IGNORE): defined. (ECONV_UNDEF_REPLACE): defined. * transcode.c (INVALID_IGNORE): removed. (INVALID_REPLACE): removed. (UNDEF_IGNORE): removed. (UNDEF_REPLACE): removed. (rb_econv_convert0): renamed from rb_econv_convert. (rb_econv_convert): defined to call rb_econv_convert0 with replace/ignore behavior moved from transcode_loop. (transcode_loop): replace/ignore behavior removed. Modified files: trunk/ChangeLog trunk/include/ruby/encoding.h trunk/test/ruby/test_econv.rb trunk/transcode.c Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 18786) +++ include/ruby/encoding.h (revision 18787) @@ -277,10 +277,18 @@ void rb_econv_binmode(rb_econv_t *ec); /* flags for rb_econv_open */ -#define ECONV_UNIVERSAL_NEWLINE_DECODER 0x100 -#define ECONV_CRLF_NEWLINE_ENCODER 0x200 -#define ECONV_CR_NEWLINE_ENCODER 0x400 +#define ECONV_INVALID_MASK 0x000f +#define ECONV_INVALID_IGNORE 0x0001 +#define ECONV_INVALID_REPLACE 0x0002 +#define ECONV_UNDEF_MASK 0x00f0 +#define ECONV_UNDEF_IGNORE 0x0010 +#define ECONV_UNDEF_REPLACE 0x0020 + +#define ECONV_UNIVERSAL_NEWLINE_DECODER 0x0100 +#define ECONV_CRLF_NEWLINE_ENCODER 0x0200 +#define ECONV_CR_NEWLINE_ENCODER 0x0400 + /* flags for rb_econv_convert */ #define ECONV_PARTIAL_INPUT 0x10000 #define ECONV_OUTPUT_FOLLOWED_BY_INPUT 0x20000 Index: ChangeLog =================================================================== --- ChangeLog (revision 18786) +++ ChangeLog (revision 18787) @@ -1,3 +1,21 @@ +Sat Aug 23 14:59:32 2008 Tanaka Akira <akr@f...> + + * include/ruby/encoding.h (ECONV_INVALID_MASK): defined. + (ECONV_INVALID_IGNORE): defined. + (ECONV_INVALID_REPLACE): defined. + (ECONV_UNDEF_MASK): defined. + (ECONV_UNDEF_IGNORE): defined. + (ECONV_UNDEF_REPLACE): defined. + + * transcode.c (INVALID_IGNORE): removed. + (INVALID_REPLACE): removed. + (UNDEF_IGNORE): removed. + (UNDEF_REPLACE): removed. + (rb_econv_convert0): renamed from rb_econv_convert. + (rb_econv_convert): defined to call rb_econv_convert0 with + replace/ignore behavior moved from transcode_loop. + (transcode_loop): replace/ignore behavior removed. + Sat Aug 23 11:23:05 2008 Tanaka Akira <akr@f...> * io.c (rb_io_extract_modeenc): check :textmode and :binmode in option Index: test/ruby/test_econv.rb =================================================================== --- test/ruby/test_econv.rb (revision 18786) +++ test/ruby/test_econv.rb (revision 18787) @@ -448,4 +448,37 @@ assert_equal(["abcdef", ""], [dst, src]) end + def test_invalid_replace + ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::INVALID_REPLACE) + ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100) + assert_equal(:finished, ret) + assert_equal("", src) + assert_equal("abc?def", dst) + end + + def test_invalid_ignore + ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::INVALID_IGNORE) + ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100) + assert_equal(:finished, ret) + assert_equal("", src) + assert_equal("abcdef", dst) + end + + def test_undef_replace + ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_REPLACE) + ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100) + assert_equal(:finished, ret) + assert_equal("", src) + assert_equal("abc?def", dst) + end + + def test_undef_ignore + ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_IGNORE) + ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100) + assert_equal(:finished, ret) + assert_equal("", src) + assert_equal("abcdef", dst) + end + + end Index: transcode.c =================================================================== --- transcode.c (revision 18786) +++ transcode.c (revision 18787) @@ -21,10 +21,6 @@ VALUE rb_cEncodingConverter; static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace; -#define INVALID_IGNORE 0x1 -#define INVALID_REPLACE 0x2 -#define UNDEF_IGNORE 0x10 -#define UNDEF_REPLACE 0x20 /* * Dispatch data and logic @@ -972,8 +968,8 @@ return econv_source_buffer_empty; } -rb_econv_result_t -rb_econv_convert(rb_econv_t *ec, +static rb_econv_result_t +rb_econv_convert0(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags) @@ -1051,6 +1047,47 @@ return res; } +static int output_replacement_character(rb_econv_t *ec); + +rb_econv_result_t +rb_econv_convert(rb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + rb_econv_result_t ret; + +resume: + ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags); + + if (ret == econv_invalid_byte_sequence) { + /* deal with invalid byte sequence */ + /* todo: add more alternative behaviors */ + if (ec->flags&ECONV_INVALID_IGNORE) { + goto resume; + } + else if (ec->flags&ECONV_INVALID_REPLACE) { + if (output_replacement_character(ec) == 0) + goto resume; + } + } + + if (ret == econv_undefined_conversion) { + /* valid character in source encoding + * but no related character(s) in destination encoding */ + /* todo: add more alternative behaviors */ + if (ec->flags&ECONV_UNDEF_IGNORE) { + goto resume; + } + else if (ec->flags&ECONV_UNDEF_REPLACE) { + if (output_replacement_character(ec) == 0) + goto resume; + } + } + + return ret; +} + const char * rb_econv_encoding_to_insert_output(rb_econv_t *ec) { @@ -1455,7 +1492,7 @@ int max_output; VALUE exc; - ec = rb_econv_open(from_encoding, to_encoding, 0); + ec = rb_econv_open(from_encoding, to_encoding, opt & (ECONV_INVALID_MASK|ECONV_UNDEF_MASK)); if (!ec) rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding); @@ -1464,35 +1501,18 @@ resume: ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, opt); + if (ret == econv_invalid_byte_sequence) { - /* deal with invalid byte sequence */ - /* todo: add more alternative behaviors */ - if (opt&INVALID_IGNORE) { - goto resume; - } - else if (opt&INVALID_REPLACE) { - if (output_replacement_character(ec) == 0) - goto resume; - } exc = make_econv_exception(ec); rb_econv_close(ec); rb_exc_raise(exc); } if (ret == econv_undefined_conversion) { - /* valid character in from encoding - * but no related character(s) in to encoding */ - /* todo: add more alternative behaviors */ - if (opt&UNDEF_IGNORE) { - goto resume; - } - else if (opt&UNDEF_REPLACE) { - if (output_replacement_character(ec) == 0) - goto resume; - } exc = make_econv_exception(ec); rb_econv_close(ec); rb_exc_raise(exc); } + if (ret == econv_destination_buffer_full) { more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop); goto resume; @@ -1520,7 +1540,7 @@ int max_output; VALUE exc; - ec = rb_econv_open(from_encoding, to_encoding, 0); + ec = rb_econv_open(from_encoding, to_encoding, opt & (ECONV_INVALID_MASK|ECONV_UNDEF_MASK)); if (!ec) rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding); @@ -1549,31 +1569,12 @@ ptr += p - &input_byte; switch (ret) { case econv_invalid_byte_sequence: - /* deal with invalid byte sequence */ - /* todo: add more alternative behaviors */ - if (opt&INVALID_IGNORE) { - break; - } - else if (opt&INVALID_REPLACE) { - if (output_replacement_character(ec) == 0) - break; - } exc = make_econv_exception(ec); rb_econv_close(ec); rb_exc_raise(exc); break; case econv_undefined_conversion: - /* valid character in from encoding - * but no related character(s) in to encoding */ - /* todo: add more alternative behaviors */ - if (opt&UNDEF_IGNORE) { - break; - } - else if (opt&UNDEF_REPLACE) { - if (output_replacement_character(ec) == 0) - break; - } exc = make_econv_exception(ec); rb_econv_close(ec); rb_exc_raise(exc); @@ -1632,10 +1633,10 @@ if (NIL_P(v)) { } else if (v==sym_ignore) { - options |= INVALID_IGNORE; + options |= ECONV_INVALID_IGNORE; } else if (v==sym_replace) { - options |= INVALID_REPLACE; + options |= ECONV_INVALID_REPLACE; v = rb_hash_aref(opt, sym_replace); } else { @@ -1645,10 +1646,10 @@ if (NIL_P(v)) { } else if (v==sym_ignore) { - options |= UNDEF_IGNORE; + options |= ECONV_UNDEF_IGNORE; } else if (v==sym_replace) { - options |= UNDEF_REPLACE; + options |= ECONV_UNDEF_REPLACE; } else { rb_raise(rb_eArgError, "unknown value for undefined character option"); @@ -2331,6 +2332,12 @@ rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0); rb_define_method(rb_cEncodingConverter, "primitive_insert_output", econv_primitive_insert_output, 1); rb_define_method(rb_cEncodingConverter, "primitive_putback", econv_primitive_putback, 1); + rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK)); + rb_define_const(rb_cEncodingConverter, "INVALID_IGNORE", INT2FIX(ECONV_INVALID_IGNORE)); + rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE)); + rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK)); + rb_define_const(rb_cEncodingConverter, "UNDEF_IGNORE", INT2FIX(ECONV_UNDEF_IGNORE)); + rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE)); rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT)); rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT)); rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER)); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/