[前][次][番号順一覧][スレッド一覧]

ruby-changes:7092

From: akr <ko1@a...>
Date: Thu, 14 Aug 2008 15:35:45 +0900 (JST)
Subject: [ruby-changes:7092] Ruby:r18610 (trunk): * transcode.c (rb_econv_conv): new function. it don't consume input

akr	2008-08-14 15:35:33 +0900 (Thu, 14 Aug 2008)

  New Revision: 18610

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18610

  Log:
    * transcode.c (rb_econv_conv): new function.  it don't consume input
      too much, even for multilevel conversion.
      (transcode_loop): use rb_econv_conv.
      (econv_primitive_convert): ditto.

  Modified files:
    trunk/ChangeLog
    trunk/test/ruby/test_econv.rb
    trunk/transcode.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18609)
+++ ChangeLog	(revision 18610)
@@ -1,3 +1,10 @@
+Thu Aug 14 15:34:10 2008  Tanaka Akira  <akr@f...>
+
+	* transcode.c (rb_econv_conv): new function.  it don't consume input
+	  too much, even for multilevel conversion.
+	  (transcode_loop): use rb_econv_conv.
+	  (econv_primitive_convert): ditto.
+
 Thu Aug 14 15:27:42 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* include/ruby/defines.h (RUBY_ALIAS_FUNCTION): fallback definition.
Index: test/ruby/test_econv.rb
===================================================================
--- test/ruby/test_econv.rb	(revision 18609)
+++ test/ruby/test_econv.rb	(revision 18610)
@@ -95,18 +95,38 @@
   def test_invalid2
     ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
     a =     ["", "abc\xFFdef", ec, nil, 1]
-    check_ec("a",       "def", :obuf_full, *a)
-    check_ec("ab",      "def", :obuf_full, *a)
+    check_ec("a",  "c\xFFdef", :obuf_full, *a)
+    check_ec("ab",  "\xFFdef", :obuf_full, *a)
     check_ec("abc",     "def", :invalid_input, *a)
-    check_ec("abcd",       "", :obuf_full, *a)
+    check_ec("abcd",      "f", :obuf_full, *a)
     check_ec("abcde",      "", :obuf_full, *a)
     check_ec("abcdef",     "", :finished, *a)
   end
 
+  def test_invalid3
+    ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
+    a =     ["", "abc\xFFdef", ec, nil, 10]
+    check_ec("abc",     "def", :invalid_input, *a)
+    check_ec("abcdef",     "", :finished, *a)
+  end
+
+  def test_invalid4
+    ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
+    a =     ["", "abc\xFFdef", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
+    check_ec("a", "bc\xFFdef", :output_followed_by_input, *a)
+    check_ec("ab", "c\xFFdef", :output_followed_by_input, *a)
+    check_ec("abc", "\xFFdef", :output_followed_by_input, *a)
+    check_ec("abc",     "def", :invalid_input, *a)
+    check_ec("abcd",     "ef", :output_followed_by_input, *a)
+    check_ec("abcde",     "f", :output_followed_by_input, *a)
+    check_ec("abcdef",     "", :output_followed_by_input, *a)
+    check_ec("abcdef",     "", :finished, *a)
+  end
+
   def test_errors
     ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
     a =     ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10]
-    check_ec("",                      "\x00B", :undefined_conversion, *a)
+    check_ec("",         "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
     check_ec("A",                     "\x00B", :invalid_input, *a) # \xDC\x00 is invalid as UTF-16BE
     check_ec("AB",                         "", :finished, *a)
   end
Index: transcode.c
===================================================================
--- transcode.c	(revision 18609)
+++ transcode.c	(revision 18610)
@@ -955,6 +955,25 @@
     return transcode_ibuf_empty;
 }
 
+static rb_trans_result_t
+rb_econv_conv(rb_trans_t *ts,
+    const unsigned char **input_ptr, const unsigned char *input_stop,
+    unsigned char **output_ptr, unsigned char *output_stop,
+    int flags)
+{
+    rb_trans_result_t res;
+
+    if ((flags & OUTPUT_FOLLOWED_BY_INPUT) ||
+        ts->num_trans == 1)
+        return rb_trans_conv(ts, input_ptr, input_stop, output_ptr, output_stop, flags);
+
+    flags |= OUTPUT_FOLLOWED_BY_INPUT;
+    do {
+        res = rb_trans_conv(ts, input_ptr, input_stop, output_ptr, output_stop, flags);
+    } while (res == transcode_output_followed_by_input);
+    return res;
+}
+
 static void
 rb_trans_close(rb_trans_t *ts)
 {
@@ -1064,7 +1083,7 @@
     max_output = last_tc->transcoder->max_output;
 
 resume:
-    ret = rb_trans_conv(ts, in_pos, in_stop, out_pos, out_stop, opt);
+    ret = rb_econv_conv(ts, in_pos, in_stop, out_pos, out_stop, opt);
     if (ret == transcode_invalid_input) {
 	/* deal with invalid byte sequence */
 	/* todo: add more alternative behaviors */
@@ -1134,14 +1153,14 @@
         if (ret == transcode_ibuf_empty) {
             if (ptr < in_stop) {
                 input_byte = *ptr;
-                ret = rb_trans_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
+                ret = rb_econv_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
             }
             else {
-                ret = rb_trans_conv(ts, NULL, NULL, out_pos, out_stop, 0);
+                ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, 0);
             }
         }
         else {
-            ret = rb_trans_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
+            ret = rb_econv_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
         }
         if (&input_byte != p)
             ptr += p - &input_byte;
@@ -1495,10 +1514,12 @@
  *
  * possible flags:
  *   Encoding::Converter::PARTIAL_INPUT # input buffer may be part of larger input
+ *   Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT # stop conversion after output before input
  *
  * possible results:
  *    :invalid_input
  *    :undefined_conversion
+ *    :output_followed_by_input
  *    :obuf_full
  *    :ibuf_empty
  *    :finished
@@ -1527,6 +1548,8 @@
  * primitive_convert stops conversion when one of following condition met.
  * - invalid byte sequence found in input buffer (:invalid_input)
  * - character not representable in output encoding (:undefined_conversion)
+ * - after some output is generated, before any input is consumed (:output_followed_by_input)
+ *   this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified.
  * - output buffer is full (:obuf_full)
  * - input buffer is empty (:ibuf_empty)
  *   this occur only when PARTIAL_INPUT is specified.
@@ -1606,7 +1629,7 @@
     op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
     os = op + output_bytesize;
 
-    res = rb_trans_conv(ts, &ip, is, &op, os, flags);
+    res = rb_econv_conv(ts, &ip, is, &op, os, flags);
     rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
     rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
 

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]