[前][次][番号順一覧][スレッド一覧]

ruby-changes:7090

From: akr <ko1@a...>
Date: Thu, 14 Aug 2008 15:12:41 +0900 (JST)
Subject: [ruby-changes:7090] Ruby:r18608 (trunk): * transcode_data.h (rb_trans_result_t): new enumeration constant:

akr	2008-08-14 15:12:27 +0900 (Thu, 14 Aug 2008)

  New Revision: 18608

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18608

  Log:
    * transcode_data.h (rb_trans_result_t): new enumeration constant:
      transcode_output_followed_by_input.
    
    * transcode.c (OUTPUT_FOLLOWED_BY_INPUT): new flag.
      (transcode_restartable0): suspend when output followed by input if
      OUTPUT_FOLLOWED_BY_INPUT is specified.
      (trans_sweep): check OUTPUT_FOLLOWED_BY_INPUT.
      (rb_trans_conv): support OUTPUT_FOLLOWED_BY_INPUT.
      (econv_primitive_convert): return :output_followed_by_input for
      transcode_output_followed_by_input.

  Modified files:
    trunk/ChangeLog
    trunk/test/ruby/test_econv.rb
    trunk/transcode.c
    trunk/transcode_data.h

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18607)
+++ ChangeLog	(revision 18608)
@@ -1,3 +1,16 @@
+Thu Aug 14 15:08:17 2008  Tanaka Akira  <akr@f...>
+
+	* transcode_data.h (rb_trans_result_t): new enumeration constant:
+	  transcode_output_followed_by_input.
+
+	* transcode.c (OUTPUT_FOLLOWED_BY_INPUT): new flag.
+	  (transcode_restartable0): suspend when output followed by input if
+	  OUTPUT_FOLLOWED_BY_INPUT is specified.
+	  (trans_sweep): check OUTPUT_FOLLOWED_BY_INPUT.
+	  (rb_trans_conv): support OUTPUT_FOLLOWED_BY_INPUT.
+	  (econv_primitive_convert): return :output_followed_by_input for
+	  transcode_output_followed_by_input.
+
 Thu Aug 14 14:57:46 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* gc.c (getrusage_time): should return the value.
Index: transcode_data.h
===================================================================
--- transcode_data.h	(revision 18607)
+++ transcode_data.h	(revision 18608)
@@ -117,6 +117,7 @@
     transcode_obuf_full,
     transcode_ibuf_empty,
     transcode_finished,
+    transcode_output_followed_by_input,
 } rb_trans_result_t;
 
 typedef struct {
Index: test/ruby/test_econv.rb
===================================================================
--- test/ruby/test_econv.rb	(revision 18607)
+++ test/ruby/test_econv.rb	(revision 18608)
@@ -70,6 +70,19 @@
     assert_econv("", :finished, 100, ["Shift_JIS", "ISO-2022-JP"], "", "")
   end
 
+  def test_iso2022jp_outstream
+    ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
+    a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
+    src << "a";        check_ec("a",                           "", :ibuf_empty, *a)
+    src << "\xA2";     check_ec("a",                           "", :ibuf_empty, *a)
+    src << "\xA4";     check_ec("a\e$B\"$",                    "", :ibuf_empty, *a)
+    src << "\xA1";     check_ec("a\e$B\"$",                    "", :ibuf_empty, *a)
+    src << "\xA2";     check_ec("a\e$B\"$!\"",                 "", :ibuf_empty, *a)
+    src << "b";        check_ec("a\e$B\"$!\"\e(Bb",            "", :ibuf_empty, *a)
+    src << "\xA2\xA6"; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&",     "", :ibuf_empty, *a)
+    a[-1] = 0;         check_ec("a\e$B\"$!\"\e(Bb\e$B\"&\e(B", "", :finished, *a)
+  end
+
   def test_invalid
     assert_econv("", :invalid_input,    100, ["UTF-8", "EUC-JP"], "\x80", "")
     assert_econv("a", :invalid_input,   100, ["UTF-8", "EUC-JP"], "a\x80", "")
@@ -98,6 +111,16 @@
     check_ec("AB",                         "", :finished, *a)
   end
 
+  def test_errors2
+    ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
+    a =     ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
+    check_ec("",         "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
+    check_ec("A",             "\xDC\x00\x00B", :output_followed_by_input, *a)
+    check_ec("A",                     "\x00B", :invalid_input, *a)
+    check_ec("AB",                         "", :output_followed_by_input, *a)
+    check_ec("AB",                         "", :finished, *a)
+  end
+
   def test_universal_newline
     ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE)
     a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
@@ -118,4 +141,17 @@
     ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE)
     assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
   end
+
+  def test_output_followed_by_input
+    ec = Encoding::Converter.new("UTF-8", "EUC-JP")
+    a =     ["",  "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
+    check_ec("a",  "bc\u{3042}def", :output_followed_by_input, *a)
+    check_ec("ab",  "c\u{3042}def", :output_followed_by_input, *a)
+    check_ec("abc",  "\u{3042}def", :output_followed_by_input, *a)
+    check_ec("abc\xA4\xA2",  "def", :output_followed_by_input, *a)
+    check_ec("abc\xA4\xA2d",  "ef", :output_followed_by_input, *a)
+    check_ec("abc\xA4\xA2de",  "f", :output_followed_by_input, *a)
+    check_ec("abc\xA4\xA2def",  "", :output_followed_by_input, *a)
+    check_ec("abc\xA4\xA2def",  "", :finished, *a)
+  end
 end
Index: transcode.c
===================================================================
--- transcode.c	(revision 18607)
+++ transcode.c	(revision 18608)
@@ -29,6 +29,7 @@
 #define UNIVERSAL_NEWLINE       0x200
 #define CRLF_NEWLINE            0x400
 #define CR_NEWLINE              0x800
+#define OUTPUT_FOLLOWED_BY_INPUT   0x1000
 
 /*
  *  Dispatch data and logic
@@ -403,6 +404,11 @@
         while (out_stop - out_p < 1) { SUSPEND(transcode_obuf_full, num); } \
     } while (0)
 
+#define SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(num) \
+    if ((opt & OUTPUT_FOLLOWED_BY_INPUT) && *out_pos != out_p) { \
+        SUSPEND(transcode_output_followed_by_input, num); \
+    }
+
 #define next_table (tc->next_table)
 #define next_info (tc->next_info)
 #define next_byte (tc->next_byte)
@@ -434,9 +440,13 @@
       case 21: goto resume_label21;
       case 22: goto resume_label22;
       case 23: goto resume_label23;
+      case 24: goto resume_label24;
+      case 25: goto resume_label25;
+      case 26: goto resume_label26;
     }
 
     while (1) {
+        SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(24);
         if (in_stop <= in_p) {
             if (!(opt & PARTIAL_INPUT))
                 break;
@@ -462,6 +472,7 @@
 	    continue;
 	  case 0x00: case 0x04: case 0x08: case 0x0C:
 	  case 0x10: case 0x14: case 0x18: case 0x1C:
+            SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(25);
 	    while (in_p >= in_stop) {
                 if (!(opt & PARTIAL_INPUT))
                     goto invalid;
@@ -536,6 +547,8 @@
             }
 	  case INVALID:
             if (tc->recognized_len + (in_p - inchar_start) <= unitlen) {
+                if (tc->recognized_len + (in_p - inchar_start) < unitlen)
+                    SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(26);
                 while ((opt & PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) {
                     in_p = in_stop;
                     SUSPEND(transcode_ibuf_empty, 8);
@@ -828,6 +841,12 @@
             f = flags;
             if (ts->num_finished != i)
                 f |= PARTIAL_INPUT;
+            if (i == 0 && (flags & OUTPUT_FOLLOWED_BY_INPUT)) {
+                start = 1;
+                flags &= ~OUTPUT_FOLLOWED_BY_INPUT;
+            }
+            if (i != 0)
+                f &= ~OUTPUT_FOLLOWED_BY_INPUT;
             iold = *ipp;
             oold = *opp;
             te->last_result = res = rb_transcoding_convert(te->tc, ipp, is, opp, os, f);
@@ -837,6 +856,7 @@
             switch (res) {
               case transcode_invalid_input:
               case transcode_undefined_conversion:
+              case transcode_output_followed_by_input:
                 return i;
 
               case transcode_obuf_full:
@@ -859,7 +879,8 @@
     int flags)
 {
     int i;
-    int start, err_index;
+    int needreport_index;
+    int sweep_start;
 
     unsigned char empty_buf;
     unsigned char *empty_ptr = &empty_buf;
@@ -874,23 +895,60 @@
         output_stop = empty_ptr;
     }
 
-    err_index = -1;
+    if (ts->elems[0].last_result == transcode_output_followed_by_input)
+        ts->elems[0].last_result = transcode_ibuf_empty;
+
+    needreport_index = -1;
     for (i = ts->num_trans-1; 0 <= i; i--) {
-        if (ts->elems[i].last_result != transcode_ibuf_empty) {
-            err_index = i;
+        switch (ts->elems[i].last_result) {
+          case transcode_invalid_input:
+          case transcode_undefined_conversion:
+          case transcode_output_followed_by_input:
+          case transcode_finished:
+            sweep_start = i+1;
+            needreport_index = i;
+            goto found_needreport;
+
+          case transcode_obuf_full:
+          case transcode_ibuf_empty:
             break;
+
+          default:
+            rb_bug("unexpected transcode last result");
         }
     }
 
+    /* /^[io]+$/ is confirmed.  but actually /^i*o*$/. */
+
+    if (ts->elems[ts->num_trans-1].last_result == transcode_obuf_full &&
+        (flags & OUTPUT_FOLLOWED_BY_INPUT)) {
+        rb_trans_result_t res;
+
+        res = rb_trans_conv(ts, NULL, NULL, output_ptr, output_stop,
+                (flags & ~OUTPUT_FOLLOWED_BY_INPUT)|PARTIAL_INPUT);
+
+        if (res == transcode_ibuf_empty)
+            return transcode_output_followed_by_input;
+        return res;
+    }
+
+    sweep_start = 0;
+
+found_needreport:
+
     do {
-        start = err_index + 1;
-        err_index = trans_sweep(ts, input_ptr, input_stop, output_ptr, output_stop, flags, start);
-    } while (err_index != -1 && err_index != ts->num_trans-1);
+        needreport_index = trans_sweep(ts, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
+        sweep_start = needreport_index + 1;
+    } while (needreport_index != -1 && needreport_index != ts->num_trans-1);
 
     for (i = ts->num_trans-1; 0 <= i; i--) {
         if (ts->elems[i].last_result != transcode_ibuf_empty) {
             rb_trans_result_t res = ts->elems[i].last_result;
-            ts->elems[i].last_result = transcode_ibuf_empty;
+            if (res == transcode_invalid_input ||
+                res == transcode_undefined_conversion ||
+                res == transcode_output_followed_by_input) {
+                ts->elems[i].last_result = transcode_ibuf_empty;
+            }
             return res;
         }
     }
@@ -1558,6 +1616,7 @@
       case transcode_obuf_full: return ID2SYM(rb_intern("obuf_full"));
       case transcode_ibuf_empty: return ID2SYM(rb_intern("ibuf_empty"));
       case transcode_finished: return ID2SYM(rb_intern("finished"));
+      case transcode_output_followed_by_input: return ID2SYM(rb_intern("output_followed_by_input"));
       default: return INT2NUM(res); /* should not be reached */
     }
 }
@@ -1601,6 +1660,7 @@
     rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);
     rb_define_method(rb_cEncodingConverter, "max_output", econv_max_output, 0);
     rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT));
+    rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(OUTPUT_FOLLOWED_BY_INPUT));
     rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE", INT2FIX(UNIVERSAL_NEWLINE));
     rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE", INT2FIX(CRLF_NEWLINE));
     rb_define_const(rb_cEncodingConverter, "CR_NEWLINE", INT2FIX(CR_NEWLINE));

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]