ruby-changes:7092
From: akr <ko1@a...>
Date: Thu, 14 Aug 2008 15:35:45 +0900 (JST)
Subject: [ruby-changes:7092] Ruby:r18610 (trunk): * transcode.c (rb_econv_conv): new function. it don't consume input
akr 2008-08-14 15:35:33 +0900 (Thu, 14 Aug 2008) New Revision: 18610 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18610 Log: * transcode.c (rb_econv_conv): new function. it don't consume input too much, even for multilevel conversion. (transcode_loop): use rb_econv_conv. (econv_primitive_convert): ditto. Modified files: trunk/ChangeLog trunk/test/ruby/test_econv.rb trunk/transcode.c Index: ChangeLog =================================================================== --- ChangeLog (revision 18609) +++ ChangeLog (revision 18610) @@ -1,3 +1,10 @@ +Thu Aug 14 15:34:10 2008 Tanaka Akira <akr@f...> + + * transcode.c (rb_econv_conv): new function. it don't consume input + too much, even for multilevel conversion. + (transcode_loop): use rb_econv_conv. + (econv_primitive_convert): ditto. + Thu Aug 14 15:27:42 2008 Nobuyoshi Nakada <nobu@r...> * include/ruby/defines.h (RUBY_ALIAS_FUNCTION): fallback definition. Index: test/ruby/test_econv.rb =================================================================== --- test/ruby/test_econv.rb (revision 18609) +++ test/ruby/test_econv.rb (revision 18610) @@ -95,18 +95,38 @@ def test_invalid2 ec = Encoding::Converter.new("Shift_JIS", "EUC-JP") a = ["", "abc\xFFdef", ec, nil, 1] - check_ec("a", "def", :obuf_full, *a) - check_ec("ab", "def", :obuf_full, *a) + check_ec("a", "c\xFFdef", :obuf_full, *a) + check_ec("ab", "\xFFdef", :obuf_full, *a) check_ec("abc", "def", :invalid_input, *a) - check_ec("abcd", "", :obuf_full, *a) + check_ec("abcd", "f", :obuf_full, *a) check_ec("abcde", "", :obuf_full, *a) check_ec("abcdef", "", :finished, *a) end + def test_invalid3 + ec = Encoding::Converter.new("Shift_JIS", "EUC-JP") + a = ["", "abc\xFFdef", ec, nil, 10] + check_ec("abc", "def", :invalid_input, *a) + check_ec("abcdef", "", :finished, *a) + end + + def test_invalid4 + ec = Encoding::Converter.new("Shift_JIS", "EUC-JP") + a = ["", "abc\xFFdef", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT] + check_ec("a", "bc\xFFdef", :output_followed_by_input, *a) + check_ec("ab", "c\xFFdef", :output_followed_by_input, *a) + check_ec("abc", "\xFFdef", :output_followed_by_input, *a) + check_ec("abc", "def", :invalid_input, *a) + check_ec("abcd", "ef", :output_followed_by_input, *a) + check_ec("abcde", "f", :output_followed_by_input, *a) + check_ec("abcdef", "", :output_followed_by_input, *a) + check_ec("abcdef", "", :finished, *a) + end + def test_errors ec = Encoding::Converter.new("UTF-16BE", "EUC-JP") a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10] - check_ec("", "\x00B", :undefined_conversion, *a) + check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a) check_ec("A", "\x00B", :invalid_input, *a) # \xDC\x00 is invalid as UTF-16BE check_ec("AB", "", :finished, *a) end Index: transcode.c =================================================================== --- transcode.c (revision 18609) +++ transcode.c (revision 18610) @@ -955,6 +955,25 @@ return transcode_ibuf_empty; } +static rb_trans_result_t +rb_econv_conv(rb_trans_t *ts, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + rb_trans_result_t res; + + if ((flags & OUTPUT_FOLLOWED_BY_INPUT) || + ts->num_trans == 1) + return rb_trans_conv(ts, input_ptr, input_stop, output_ptr, output_stop, flags); + + flags |= OUTPUT_FOLLOWED_BY_INPUT; + do { + res = rb_trans_conv(ts, input_ptr, input_stop, output_ptr, output_stop, flags); + } while (res == transcode_output_followed_by_input); + return res; +} + static void rb_trans_close(rb_trans_t *ts) { @@ -1064,7 +1083,7 @@ max_output = last_tc->transcoder->max_output; resume: - ret = rb_trans_conv(ts, in_pos, in_stop, out_pos, out_stop, opt); + ret = rb_econv_conv(ts, in_pos, in_stop, out_pos, out_stop, opt); if (ret == transcode_invalid_input) { /* deal with invalid byte sequence */ /* todo: add more alternative behaviors */ @@ -1134,14 +1153,14 @@ if (ret == transcode_ibuf_empty) { if (ptr < in_stop) { input_byte = *ptr; - ret = rb_trans_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT); + ret = rb_econv_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT); } else { - ret = rb_trans_conv(ts, NULL, NULL, out_pos, out_stop, 0); + ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, 0); } } else { - ret = rb_trans_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT); + ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT); } if (&input_byte != p) ptr += p - &input_byte; @@ -1495,10 +1514,12 @@ * * possible flags: * Encoding::Converter::PARTIAL_INPUT # input buffer may be part of larger input + * Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT # stop conversion after output before input * * possible results: * :invalid_input * :undefined_conversion + * :output_followed_by_input * :obuf_full * :ibuf_empty * :finished @@ -1527,6 +1548,8 @@ * primitive_convert stops conversion when one of following condition met. * - invalid byte sequence found in input buffer (:invalid_input) * - character not representable in output encoding (:undefined_conversion) + * - after some output is generated, before any input is consumed (:output_followed_by_input) + * this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified. * - output buffer is full (:obuf_full) * - input buffer is empty (:ibuf_empty) * this occur only when PARTIAL_INPUT is specified. @@ -1606,7 +1629,7 @@ op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset; os = op + output_bytesize; - res = rb_trans_conv(ts, &ip, is, &op, os, flags); + res = rb_econv_conv(ts, &ip, is, &op, os, flags); rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output)); rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input)); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/