ruby-changes:7090
From: akr <ko1@a...>
Date: Thu, 14 Aug 2008 15:12:41 +0900 (JST)
Subject: [ruby-changes:7090] Ruby:r18608 (trunk): * transcode_data.h (rb_trans_result_t): new enumeration constant:
akr 2008-08-14 15:12:27 +0900 (Thu, 14 Aug 2008) New Revision: 18608 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18608 Log: * transcode_data.h (rb_trans_result_t): new enumeration constant: transcode_output_followed_by_input. * transcode.c (OUTPUT_FOLLOWED_BY_INPUT): new flag. (transcode_restartable0): suspend when output followed by input if OUTPUT_FOLLOWED_BY_INPUT is specified. (trans_sweep): check OUTPUT_FOLLOWED_BY_INPUT. (rb_trans_conv): support OUTPUT_FOLLOWED_BY_INPUT. (econv_primitive_convert): return :output_followed_by_input for transcode_output_followed_by_input. Modified files: trunk/ChangeLog trunk/test/ruby/test_econv.rb trunk/transcode.c trunk/transcode_data.h Index: ChangeLog =================================================================== --- ChangeLog (revision 18607) +++ ChangeLog (revision 18608) @@ -1,3 +1,16 @@ +Thu Aug 14 15:08:17 2008 Tanaka Akira <akr@f...> + + * transcode_data.h (rb_trans_result_t): new enumeration constant: + transcode_output_followed_by_input. + + * transcode.c (OUTPUT_FOLLOWED_BY_INPUT): new flag. + (transcode_restartable0): suspend when output followed by input if + OUTPUT_FOLLOWED_BY_INPUT is specified. + (trans_sweep): check OUTPUT_FOLLOWED_BY_INPUT. + (rb_trans_conv): support OUTPUT_FOLLOWED_BY_INPUT. + (econv_primitive_convert): return :output_followed_by_input for + transcode_output_followed_by_input. + Thu Aug 14 14:57:46 2008 Nobuyoshi Nakada <nobu@r...> * gc.c (getrusage_time): should return the value. Index: transcode_data.h =================================================================== --- transcode_data.h (revision 18607) +++ transcode_data.h (revision 18608) @@ -117,6 +117,7 @@ transcode_obuf_full, transcode_ibuf_empty, transcode_finished, + transcode_output_followed_by_input, } rb_trans_result_t; typedef struct { Index: test/ruby/test_econv.rb =================================================================== --- test/ruby/test_econv.rb (revision 18607) +++ test/ruby/test_econv.rb (revision 18608) @@ -70,6 +70,19 @@ assert_econv("", :finished, 100, ["Shift_JIS", "ISO-2022-JP"], "", "") end + def test_iso2022jp_outstream + ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP") + a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + src << "a"; check_ec("a", "", :ibuf_empty, *a) + src << "\xA2"; check_ec("a", "", :ibuf_empty, *a) + src << "\xA4"; check_ec("a\e$B\"$", "", :ibuf_empty, *a) + src << "\xA1"; check_ec("a\e$B\"$", "", :ibuf_empty, *a) + src << "\xA2"; check_ec("a\e$B\"$!\"", "", :ibuf_empty, *a) + src << "b"; check_ec("a\e$B\"$!\"\e(Bb", "", :ibuf_empty, *a) + src << "\xA2\xA6"; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&", "", :ibuf_empty, *a) + a[-1] = 0; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&\e(B", "", :finished, *a) + end + def test_invalid assert_econv("", :invalid_input, 100, ["UTF-8", "EUC-JP"], "\x80", "") assert_econv("a", :invalid_input, 100, ["UTF-8", "EUC-JP"], "a\x80", "") @@ -98,6 +111,16 @@ check_ec("AB", "", :finished, *a) end + def test_errors2 + ec = Encoding::Converter.new("UTF-16BE", "EUC-JP") + a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT] + check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a) + check_ec("A", "\xDC\x00\x00B", :output_followed_by_input, *a) + check_ec("A", "\x00B", :invalid_input, *a) + check_ec("AB", "", :output_followed_by_input, *a) + check_ec("AB", "", :finished, *a) + end + def test_universal_newline ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE) a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] @@ -118,4 +141,17 @@ ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE) assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "") end + + def test_output_followed_by_input + ec = Encoding::Converter.new("UTF-8", "EUC-JP") + a = ["", "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT] + check_ec("a", "bc\u{3042}def", :output_followed_by_input, *a) + check_ec("ab", "c\u{3042}def", :output_followed_by_input, *a) + check_ec("abc", "\u{3042}def", :output_followed_by_input, *a) + check_ec("abc\xA4\xA2", "def", :output_followed_by_input, *a) + check_ec("abc\xA4\xA2d", "ef", :output_followed_by_input, *a) + check_ec("abc\xA4\xA2de", "f", :output_followed_by_input, *a) + check_ec("abc\xA4\xA2def", "", :output_followed_by_input, *a) + check_ec("abc\xA4\xA2def", "", :finished, *a) + end end Index: transcode.c =================================================================== --- transcode.c (revision 18607) +++ transcode.c (revision 18608) @@ -29,6 +29,7 @@ #define UNIVERSAL_NEWLINE 0x200 #define CRLF_NEWLINE 0x400 #define CR_NEWLINE 0x800 +#define OUTPUT_FOLLOWED_BY_INPUT 0x1000 /* * Dispatch data and logic @@ -403,6 +404,11 @@ while (out_stop - out_p < 1) { SUSPEND(transcode_obuf_full, num); } \ } while (0) +#define SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(num) \ + if ((opt & OUTPUT_FOLLOWED_BY_INPUT) && *out_pos != out_p) { \ + SUSPEND(transcode_output_followed_by_input, num); \ + } + #define next_table (tc->next_table) #define next_info (tc->next_info) #define next_byte (tc->next_byte) @@ -434,9 +440,13 @@ case 21: goto resume_label21; case 22: goto resume_label22; case 23: goto resume_label23; + case 24: goto resume_label24; + case 25: goto resume_label25; + case 26: goto resume_label26; } while (1) { + SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(24); if (in_stop <= in_p) { if (!(opt & PARTIAL_INPUT)) break; @@ -462,6 +472,7 @@ continue; case 0x00: case 0x04: case 0x08: case 0x0C: case 0x10: case 0x14: case 0x18: case 0x1C: + SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(25); while (in_p >= in_stop) { if (!(opt & PARTIAL_INPUT)) goto invalid; @@ -536,6 +547,8 @@ } case INVALID: if (tc->recognized_len + (in_p - inchar_start) <= unitlen) { + if (tc->recognized_len + (in_p - inchar_start) < unitlen) + SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(26); while ((opt & PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) { in_p = in_stop; SUSPEND(transcode_ibuf_empty, 8); @@ -828,6 +841,12 @@ f = flags; if (ts->num_finished != i) f |= PARTIAL_INPUT; + if (i == 0 && (flags & OUTPUT_FOLLOWED_BY_INPUT)) { + start = 1; + flags &= ~OUTPUT_FOLLOWED_BY_INPUT; + } + if (i != 0) + f &= ~OUTPUT_FOLLOWED_BY_INPUT; iold = *ipp; oold = *opp; te->last_result = res = rb_transcoding_convert(te->tc, ipp, is, opp, os, f); @@ -837,6 +856,7 @@ switch (res) { case transcode_invalid_input: case transcode_undefined_conversion: + case transcode_output_followed_by_input: return i; case transcode_obuf_full: @@ -859,7 +879,8 @@ int flags) { int i; - int start, err_index; + int needreport_index; + int sweep_start; unsigned char empty_buf; unsigned char *empty_ptr = &empty_buf; @@ -874,23 +895,60 @@ output_stop = empty_ptr; } - err_index = -1; + if (ts->elems[0].last_result == transcode_output_followed_by_input) + ts->elems[0].last_result = transcode_ibuf_empty; + + needreport_index = -1; for (i = ts->num_trans-1; 0 <= i; i--) { - if (ts->elems[i].last_result != transcode_ibuf_empty) { - err_index = i; + switch (ts->elems[i].last_result) { + case transcode_invalid_input: + case transcode_undefined_conversion: + case transcode_output_followed_by_input: + case transcode_finished: + sweep_start = i+1; + needreport_index = i; + goto found_needreport; + + case transcode_obuf_full: + case transcode_ibuf_empty: break; + + default: + rb_bug("unexpected transcode last result"); } } + /* /^[io]+$/ is confirmed. but actually /^i*o*$/. */ + + if (ts->elems[ts->num_trans-1].last_result == transcode_obuf_full && + (flags & OUTPUT_FOLLOWED_BY_INPUT)) { + rb_trans_result_t res; + + res = rb_trans_conv(ts, NULL, NULL, output_ptr, output_stop, + (flags & ~OUTPUT_FOLLOWED_BY_INPUT)|PARTIAL_INPUT); + + if (res == transcode_ibuf_empty) + return transcode_output_followed_by_input; + return res; + } + + sweep_start = 0; + +found_needreport: + do { - start = err_index + 1; - err_index = trans_sweep(ts, input_ptr, input_stop, output_ptr, output_stop, flags, start); - } while (err_index != -1 && err_index != ts->num_trans-1); + needreport_index = trans_sweep(ts, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start); + sweep_start = needreport_index + 1; + } while (needreport_index != -1 && needreport_index != ts->num_trans-1); for (i = ts->num_trans-1; 0 <= i; i--) { if (ts->elems[i].last_result != transcode_ibuf_empty) { rb_trans_result_t res = ts->elems[i].last_result; - ts->elems[i].last_result = transcode_ibuf_empty; + if (res == transcode_invalid_input || + res == transcode_undefined_conversion || + res == transcode_output_followed_by_input) { + ts->elems[i].last_result = transcode_ibuf_empty; + } return res; } } @@ -1558,6 +1616,7 @@ case transcode_obuf_full: return ID2SYM(rb_intern("obuf_full")); case transcode_ibuf_empty: return ID2SYM(rb_intern("ibuf_empty")); case transcode_finished: return ID2SYM(rb_intern("finished")); + case transcode_output_followed_by_input: return ID2SYM(rb_intern("output_followed_by_input")); default: return INT2NUM(res); /* should not be reached */ } } @@ -1601,6 +1660,7 @@ rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1); rb_define_method(rb_cEncodingConverter, "max_output", econv_max_output, 0); rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT)); + rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(OUTPUT_FOLLOWED_BY_INPUT)); rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE", INT2FIX(UNIVERSAL_NEWLINE)); rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE", INT2FIX(CRLF_NEWLINE)); rb_define_const(rb_cEncodingConverter, "CR_NEWLINE", INT2FIX(CR_NEWLINE)); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/