[前][次][番号順一覧][スレッド一覧]

ruby-changes:6934

From: akr <ko1@a...>
Date: Sat, 9 Aug 2008 15:02:20 +0900 (JST)
Subject: [ruby-changes:6934] Ruby:r18452 (trunk): * transcode_data.h (rb_transcoding): add fields for restartable

akr	2008-08-09 15:02:01 +0900 (Sat, 09 Aug 2008)

  New Revision: 18452

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18452

  Log:
    * transcode_data.h (rb_transcoding): add fields for restartable 
      transcoding.
      (rb_transcoder): add max_input field.
      from_unit_length field is renamed to input_unit_length.
    
    * tool/transcode-tblgen.rb: generate max_input field.
    
    * enc/trans/iso2022.erb.c: follow rb_transcoder change.
    
    * enc/trans/utf_16_32.erb.c: ditto.
    
    * transcode.c (PARTIAL_INPUT): new constant.
      (transcode_char_start): new function.
      (transcode_result_t): new type.
      (transcode_restartable): new function.
      (more_output_buffer): new function.
      (transcode_loop): use transcode_restartable.

  Modified files:
    trunk/ChangeLog
    trunk/enc/trans/iso2022.erb.c
    trunk/enc/trans/utf_16_32.erb.c
    trunk/tool/transcode-tblgen.rb
    trunk/transcode.c
    trunk/transcode_data.h

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18451)
+++ ChangeLog	(revision 18452)
@@ -1,3 +1,23 @@
+Sat Aug  9 14:39:34 2008  Tanaka Akira  <akr@f...>
+
+	* transcode_data.h (rb_transcoding): add fields for restartable
+	  transcoding.
+	  (rb_transcoder): add max_input field.
+	  from_unit_length field is renamed to input_unit_length.
+
+	* tool/transcode-tblgen.rb: generate max_input field.
+
+	* enc/trans/iso2022.erb.c: follow rb_transcoder change.
+
+	* enc/trans/utf_16_32.erb.c: ditto.
+
+	* transcode.c (PARTIAL_INPUT): new constant.
+	  (transcode_char_start): new function.
+	  (transcode_result_t): new type.
+	  (transcode_restartable): new function.
+	  (more_output_buffer): new function.
+	  (transcode_loop): use transcode_restartable.
+
 Sat Aug  9 13:35:08 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* stable/ext/socket/socket.c (NI_MAXHOST, NI_MAXSERV): fixed invalid
Index: enc/trans/iso2022.erb.c
===================================================================
--- enc/trans/iso2022.erb.c	(revision 18451)
+++ enc/trans/iso2022.erb.c	(revision 18452)
@@ -57,7 +57,10 @@
 
 static const rb_transcoder
 rb_ISO_2022_JP_to_EUC_JP = {
-    "ISO-2022-JP", "EUC-JP", &iso2022jp_to_eucjp, 1, 3,
+    "ISO-2022-JP", "EUC-JP", &iso2022jp_to_eucjp,
+    1, /* input_unit_length */
+    3, /* max_input */
+    3, /* max_output */
     NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp
 };
 
@@ -129,7 +132,10 @@
 
 static const rb_transcoder
 rb_EUC_JP_to_ISO_2022_JP = {
-    "EUC-JP", "ISO-2022-JP", &eucjp_to_iso2022jp, 1, 5,
+    "EUC-JP", "ISO-2022-JP", &eucjp_to_iso2022jp,
+    1, /* input_unit_length */
+    3, /* max_input */
+    5, /* max_output */
     NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp, finish_eucjp_to_iso2022jp
 };
 
Index: enc/trans/utf_16_32.erb.c
===================================================================
--- enc/trans/utf_16_32.erb.c	(revision 18451)
+++ enc/trans/utf_16_32.erb.c	(revision 18452)
@@ -231,7 +231,10 @@
 
 static const rb_transcoder
 rb_from_UTF_16BE = {
-    "UTF-16BE", "UTF-8", &from_UTF_16BE, 2, 4,
+    "UTF-16BE", "UTF-8", &from_UTF_16BE,
+    2, /* input_unit_length */
+    4, /* max_input */
+    4, /* max_output */
     NULL, NULL, NULL, &fun_so_from_utf_16be
 };
 
@@ -252,7 +255,10 @@
 
 static const rb_transcoder
 rb_to_UTF_16BE = {
-    "UTF-8", "UTF-16BE", &to_UTF_16BE, 1, 4,
+    "UTF-8", "UTF-16BE", &to_UTF_16BE,
+    1, /* input_unit_length */
+    4, /* max_input */
+    4, /* max_output */
     NULL, NULL, NULL, &fun_so_to_utf_16be
 };
 
@@ -265,13 +271,19 @@
 
 static const rb_transcoder
 rb_from_UTF_16LE = {
-    "UTF-16LE", "UTF-8", &from_UTF_16LE, 2, 4,
+    "UTF-16LE", "UTF-8", &from_UTF_16LE,
+    2, /* input_unit_length */
+    4, /* max_input */
+    4, /* max_output */
     NULL, NULL, NULL, &fun_so_from_utf_16le
 };
 
 static const rb_transcoder
 rb_to_UTF_16LE = {
-    "UTF-8", "UTF-16LE", &to_UTF_16BE, 1, 4,
+    "UTF-8", "UTF-16LE", &to_UTF_16BE,
+    1, /* input_unit_length */
+    4, /* max_input */
+    4, /* max_output */
     NULL, NULL, NULL, &fun_so_to_utf_16le
 };
 
@@ -284,13 +296,19 @@
 
 static const rb_transcoder
 rb_from_UTF_32BE = {
-    "UTF-32BE", "UTF-8", &from_UTF_32BE, 4, 4,
+    "UTF-32BE", "UTF-8", &from_UTF_32BE,
+    4, /* input_unit_length */
+    4, /* max_input */
+    4, /* max_output */
     NULL, NULL, NULL, &fun_so_from_utf_32be
 };
 
 static const rb_transcoder
 rb_to_UTF_32BE = {
-    "UTF-8", "UTF-32BE", &to_UTF_16BE, 1, 4,
+    "UTF-8", "UTF-32BE", &to_UTF_16BE,
+    1, /* input_unit_length */
+    4, /* max_input */
+    4, /* max_output */
     NULL, NULL, NULL, &fun_so_to_utf_32be
 };
 
@@ -303,13 +321,19 @@
 
 static const rb_transcoder
 rb_from_UTF_32LE = {
-    "UTF-32LE", "UTF-8", &from_UTF_32LE, 4, 4,
+    "UTF-32LE", "UTF-8", &from_UTF_32LE,
+    4, /* input_unit_length */
+    4, /* max_input */
+    4, /* max_output */
     NULL, NULL, NULL, &fun_so_from_utf_32le
 };
 
 static const rb_transcoder
 rb_to_UTF_32LE = {
-    "UTF-8", "UTF-32LE", &to_UTF_16BE, 1, 4,
+    "UTF-8", "UTF-32LE", &to_UTF_16BE,
+    1, /* input_unit_length */
+    4, /* max_input */
+    4, /* max_output */
     NULL, NULL, NULL, &fun_so_to_utf_32le
 };
 
Index: transcode_data.h
===================================================================
--- transcode_data.h	(revision 18451)
+++ transcode_data.h	(revision 18452)
@@ -64,15 +64,28 @@
 			       or NULL if something else is being converted */
     unsigned char *(*flush_func)(struct rb_transcoding*, int, int);
 
+    int resume_position;
+    const BYTE_LOOKUP *next_table;
+    int readlen;
+    union {
+        unsigned char ary[8]; /* max_input <= sizeof(ary) */
+        unsigned char *ptr; /* length is max_input */
+    } readbuf;
+
     unsigned char stateful[256]; /* opaque data for stateful encoding */
 } rb_transcoding;
+#define TRANSCODING_READBUF(tc) \
+    ((tc)->transcoder->max_input <= sizeof((tc)->readbuf.ary) ? \
+     (tc)->readbuf.ary : \
+     (tc)->readbuf.ptr)
 
 /* static structure, one per supported encoding pair */
 typedef struct rb_transcoder {
     const char *from_encoding;
     const char *to_encoding;
     const BYTE_LOOKUP *conv_tree_start;
-    int from_unit_length;
+    int input_unit_length;
+    int max_input;
     int max_output;
     VALUE (*func_ii)(rb_transcoding*, VALUE); /* info  -> info   */
     VALUE (*func_si)(rb_transcoding*, const unsigned char*, size_t); /* start -> info   */
Index: tool/transcode-tblgen.rb
===================================================================
--- tool/transcode-tblgen.rb	(revision 18451)
+++ tool/transcode-tblgen.rb	(revision 18452)
@@ -101,6 +101,22 @@
     "\#<#{self.class}: #{self.to_s}>"
   end
 
+  def min_length
+    if @pat.empty?
+      nil
+    else
+      @pat.map {|seq| seq.length }.min
+    end
+  end
+
+  def max_length
+    if @pat.empty?
+      nil
+    else
+      @pat.map {|seq| seq.length }.max
+    end
+  end
+
   def emptyable?
     @pat.any? {|seq|
       seq.empty?
@@ -170,6 +186,10 @@
     ">"
   end
 
+  def max_input_length
+    @map.keys.map {|k| k.max_length }.max
+  end
+
   def empty_action
     @map.each {|ss, action|
       return action if ss.emptyable?
@@ -386,6 +406,8 @@
   }
   am = ActionMap.parse(h)
 
+  max_input = am.max_input_length
+
   if ValidEncoding[from]
     valid_encoding = StrSet.parse(ValidEncoding[from])
   else
@@ -394,7 +416,7 @@
 
   code = ''
   defined_name = am.generate_node(code, name, valid_encoding)
-  return defined_name, code
+  return defined_name, code, max_input
 end
 
 TRANSCODERS = []
@@ -411,16 +433,19 @@
     tree_name = "from_#{id_from}_to_#{id_to}"
   end
   map = encode_utf8(map)
-  real_tree_name, tree_code = transcode_compile_tree(tree_name, from, map)
+  real_tree_name, tree_code, max_input = transcode_compile_tree(tree_name, from, map)
   transcoder_name = "rb_#{tree_name}"
   TRANSCODERS << transcoder_name
-  from_unit_length = UnitLength[from]
+  input_unit_length = UnitLength[from]
   max_output = map.map {|k,v| String === v ? v.length/2 : 1 }.max
   transcoder_code = <<"End"
 static const rb_transcoder
 #{transcoder_name} = {
-    #{c_esc from}, #{c_esc to}, &#{real_tree_name}, #{from_unit_length}, #{max_output},
-    NULL, NULL,
+    #{c_esc from}, #{c_esc to}, &#{real_tree_name},
+    #{input_unit_length}, /* input_unit_length */
+    #{max_input}, /* max_input */
+    #{max_output}, /* max_output */
+    NULL, NULL, NULL, NULL, NULL
 };
 End
   tree_code + "\n" + transcoder_code
Index: transcode.c
===================================================================
--- transcode.c	(revision 18451)
+++ transcode.c	(revision 18452)
@@ -20,6 +20,7 @@
 #define INVALID_REPLACE 0x2
 #define UNDEF_IGNORE 0x10
 #define UNDEF_REPLACE 0x20
+#define PARTIAL_INPUT 0x100
 
 /*
  *  Dispatch data and logic
@@ -324,34 +325,117 @@
 /*
  *  Transcoding engine logic
  */
-static void
-transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
-	       const unsigned char *in_stop, unsigned char *out_stop,
-	       const rb_transcoder *my_transcoder,
-	       rb_transcoding *my_transcoding,
-	       const int opt)
+
+static const unsigned char *
+transcode_char_start(rb_transcoding *my_transcoding,
+                         const unsigned char **in_pos,
+                         const unsigned char *in_p,
+                         int readlen)
 {
-    const unsigned char *in_p = *in_pos;
-    unsigned char *out_p = *out_pos;
-    const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
+    const unsigned char *ptr;
+    if (in_p - *in_pos < readlen) {
+        int restlen = readlen - my_transcoding->readlen;
+        MEMCPY(TRANSCODING_READBUF(my_transcoding) + my_transcoding->readlen,
+               in_p - restlen, unsigned char, restlen);
+        my_transcoding->readlen = readlen;
+        ptr = TRANSCODING_READBUF(my_transcoding);
+    }
+    else {
+        ptr = in_p - readlen;
+    }
+    return ptr;
+}
+
+typedef enum {
+    transcode_invalid_input,
+    transcode_undefined_conversion,
+    transcode_obuf_full,
+    transcode_ibuf_empty,
+    transcode_finished,
+} transcode_result_t;
+
+static transcode_result_t
+transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
+                      const unsigned char *in_stop, unsigned char *out_stop,
+                      const rb_transcoder *my_transcoder,
+                      rb_transcoding *my_transcoding,
+                      const int opt)
+
+{
+    int unitlen = my_transcoder->input_unit_length;
+
+    const unsigned char *in_p;
+    unsigned char *out_p;
+    int readlen;
     const BYTE_LOOKUP *next_table;
-    const unsigned char *char_start;
-    VALUE next_info;
-    unsigned char next_byte;
-    unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
-    rb_encoding *to_encoding = rb_enc_find(my_transcoder->to_encoding);
 
-    while (in_p < in_stop) {
-	char_start = in_p;
-	next_table = conv_tree_start;
-	if (out_p >= out_s) {
-	    int len = (out_p - *out_pos);
-	    int new_len = (len + my_transcoder->max_output) * 2;
-	    *out_pos = (*my_transcoding->flush_func)(my_transcoding, len, new_len);
-	    out_p = *out_pos + len;
-	    out_s = *out_pos + new_len - my_transcoder->max_output;
-	}
+    unsigned char empty_buf;
+    unsigned char *empty_ptr = &empty_buf;
+
+    if (!in_pos) {
+        in_pos = (const unsigned char **)&empty_ptr;
+        in_stop = empty_ptr;
+    }
+
+    if (!out_pos) {
+        out_pos = &empty_ptr;
+        out_stop = empty_ptr;
+    }
+
+    in_p = *in_pos;
+    out_p = *out_pos;
+    readlen = my_transcoding->readlen;
+    next_table = my_transcoding->next_table;
+
+#define SUSPEND(ret, num) \
+    do { \
+        my_transcoding->resume_position = (num); \
+        if (my_transcoding->readlen < readlen) \
+            MEMCPY(TRANSCODING_READBUF(my_transcoding)+my_transcoding->readlen, \
+                   in_p - (readlen-my_transcoding->readlen), \
+                   unsigned char, \
+                   readlen-my_transcoding->readlen); \
+        *in_pos = in_p; \
+        *out_pos = out_p; \
+        my_transcoding->readlen = readlen; \
+        my_transcoding->next_table = next_table; \
+        return ret; \
+        resume_label ## num:; \
+    } while (0)
+
+    switch (my_transcoding->resume_position) {
+      case 0: break;
+      case 1: goto resume_label1;
+      case 2: goto resume_label2;
+      case 3: goto resume_label3;
+      case 4: goto resume_label4;
+      case 5: goto resume_label5;
+      case 6: goto resume_label6;
+      case 7: goto resume_label7;
+      case 8: goto resume_label8;
+      case 9: goto resume_label9;
+      case 10: goto resume_label10;
+      case 11: goto resume_label11;
+      case 12: goto resume_label12;
+      case 13: goto resume_label13;
+      case 14: goto resume_label14;
+    }
+
+    while (1) {
+        unsigned char next_byte;
+        VALUE next_info;
+
+        if (in_stop <= in_p) {
+            if (!(opt & PARTIAL_INPUT))
+                break;
+            SUSPEND(transcode_ibuf_empty, 7);
+            continue;
+        }
+
+        my_transcoding->readlen = readlen = 0;
+	next_table = my_transcoder->conv_tree_start;
 	next_byte = (unsigned char)*in_p++;
+        readlen++;
       follow_byte:
         if (next_byte < next_table->base[0] || next_table->base[1] < next_byte)
             next_info = INVALID;
@@ -361,32 +445,42 @@
         }
       follow_info:
 	switch (next_info & 0x1F) {
-	  case NOMAP:
+          case NOMAP: /* xxx: copy last byte only? */
+            while (out_stop - out_p < 1) { SUSPEND(transcode_obuf_full, 3); }
 	    *out_p++ = next_byte;
 	    continue;
 	  case 0x00: case 0x04: case 0x08: case 0x0C:
 	  case 0x10: case 0x14: case 0x18: case 0x1C:
-	    if (in_p >= in_stop) {
-		/* todo: deal with the case of backtracking */
-		/* todo: deal with incomplete input (streaming) */
-		goto invalid;
+	    while (in_p >= in_stop) {
+                if (!(opt & PARTIAL_INPUT))
+                    goto invalid;
+                SUSPEND(transcode_ibuf_empty, 5);
 	    }
 	    next_byte = (unsigned char)*in_p++;
+            readlen++;
 	    next_table = (const BYTE_LOOKUP *)next_info;
 	    goto follow_byte;
 	    /* maybe rewrite the following cases to use fallthrough???? */
 	  case ZERObt: /* drop input */
 	    continue;
 	  case ONEbt:
+            while (out_stop - out_p < 1) { SUSPEND(transcode_obuf_full, 9); }
 	    *out_p++ = getBT1(next_info);
 	    continue;
 	  case TWObt:
+            while (out_stop - out_p < 2) { SUSPEND(transcode_obuf_full, 10); }
 	    *out_p++ = getBT1(next_info);
 	    *out_p++ = getBT2(next_info);
 	    continue;
+	  case THREEbt:
+            while (out_stop - out_p < 3) { SUSPEND(transcode_obuf_full, 11); }
+	    *out_p++ = getBT1(next_info);
+	    *out_p++ = getBT2(next_info);
+	    *out_p++ = getBT3(next_info);
+	    continue;
 	  case FOURbt:
+            while (out_stop - out_p < 4) { SUSPEND(transcode_obuf_full, 12); }
 	    *out_p++ = getBT0(next_info);
-	  case THREEbt: /* fall through */
 	    *out_p++ = getBT1(next_info);
 	    *out_p++ = getBT2(next_info);
 	    *out_p++ = getBT3(next_info);
@@ -395,72 +489,247 @@
 	    next_info = (VALUE)(*my_transcoder->func_ii)(my_transcoding, next_info);
 	    goto follow_info;
 	  case FUNsi:
-	    next_info = (VALUE)(*my_transcoder->func_si)(my_transcoding, char_start, (size_t)(in_p-char_start));
-	    goto follow_info;
-	    break;
+            {
+                const unsigned char *char_start;
+                char_start = transcode_char_start(my_transcoding, in_pos, in_p, readlen);
+                next_info = (VALUE)(*my_transcoder->func_si)(my_transcoding, char_start, (size_t)readlen);
+                break;
+            }
 	  case FUNio:
+            while (out_stop - out_p < my_transcoder->max_output) { SUSPEND(transcode_obuf_full, 13); }
 	    out_p += (VALUE)(*my_transcoder->func_io)(my_transcoding, next_info, out_p);
 	    break;
 	  case FUNso:
-	    out_p += (VALUE)(*my_transcoder->func_so)(my_transcoding, char_start, (size_t)(in_p-char_start), out_p);
-	    break;
+            {
+                const unsigned char *char_start;
+                while (out_stop - out_p < my_transcoder->max_output) { SUSPEND(transcode_obuf_full, 14); }
+                char_start = transcode_char_start(my_transcoding, in_pos, in_p, readlen);
+                out_p += (VALUE)(*my_transcoder->func_so)(my_transcoding, char_start, (size_t)readlen, out_p);
+                break;
+            }
 	  case INVALID:
             {
-                int unitlen = my_transcoder->from_unit_length;
-                if (in_stop - char_start <= unitlen)
-                    in_p = in_stop;
-                else if (in_p - char_start <= unitlen)
-                    in_p = char_start + unitlen;
-                else
-                    in_p = char_start + ((in_p - char_start - 1) / unitlen) * unitlen;
+                if (readlen <= unitlen) {
+                    while ((opt & PARTIAL_INPUT) && readlen + (in_stop - in_p) < unitlen) {
+                        readlen += in_stop - in_p;
+                        in_p = in_stop;
+                        SUSPEND(transcode_ibuf_empty, 8);
+                    }
+                    if (readlen + (in_stop - in_p) <= unitlen)
+                        in_p = in_stop;
+                    else
+                        in_p += unitlen - readlen;
+                }
+                else {
+                    /* xxx: possibly in_p is lesser than *in_pos
+                     * caller may want to access readbuf.  */
+                    in_p += ((readlen - 1) / unitlen) * unitlen - readlen;
+                }
                 goto invalid;
             }
 	  case UNDEF:
 	    goto undef;
 	}
 	continue;
+
       invalid:
+        SUSPEND(transcode_invalid_input, 1);
+        continue;
+
+      undef:
+        SUSPEND(transcode_undefined_conversion, 2);
+        continue;
+    }
+
+    /* cleanup */
+    if (my_transcoder->finish_func) {
+	while (out_stop - out_p < my_transcoder->max_output) {
+            SUSPEND(transcode_obuf_full, 4);
+	}
+        out_p += my_transcoder->finish_func(my_transcoding, out_p);
+    }
+    while (1)
+        SUSPEND(transcode_finished, 6);
+#undef SUSPEND
+}
+
+static void
+more_output_buffer(
+        rb_transcoding *my_transcoding,
+        unsigned char **out_start_ptr,
+        unsigned char **out_pos,
+        unsigned char **out_stop_ptr)
+{
+    size_t len = (*out_pos - *out_start_ptr);
+    size_t new_len = (len + my_transcoding->transcoder->max_output) * 2;
+    *out_start_ptr = (*my_transcoding->flush_func)(my_transcoding, len, new_len);
+    *out_pos = *out_start_ptr + len;
+    *out_stop_ptr = *out_start_ptr + new_len;
+}
+
+#if 1
+static void
+transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
+	       const unsigned char *in_stop, unsigned char *out_stop,
+	       const rb_transcoder *my_transcoder,
+	       rb_transcoding *my_transcoding,
+	       const int opt)
+{
+    transcode_result_t ret;
+    unsigned char *out_start = *out_pos;
+
+    my_transcoding->resume_position = 0;
+    my_transcoding->readlen = 0;
+
+    if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) {
+        my_transcoding->readbuf.ptr = xmalloc(my_transcoder->max_input);
+    }
+#define CLEANUP \
+    do { \
+        if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) \
+            xfree(my_transcoding->readbuf.ptr); \
+    } while(0)
+
+resume:
+    ret = transcode_restartable(in_pos, out_pos, in_stop, out_stop, my_transcoder, my_transcoding, opt);
+    if (ret == transcode_invalid_input) {
 	/* deal with invalid byte sequence */
 	/* todo: add more alternative behaviors */
 	if (opt&INVALID_IGNORE) {
-	    continue;
+            goto resume;
 	}
 	else if (opt&INVALID_REPLACE) {
-	    output_replacement_character(&out_p, to_encoding);
-	    continue;
+            if (out_stop - *out_pos < my_transcoder->max_output)
+                more_output_buffer(my_transcoding, &out_start, out_pos, &out_stop);
+	    output_replacement_character(out_pos, rb_enc_find(my_transcoder->to_encoding));
+            goto resume;
 	}
+        CLEANUP;
 	rb_raise(TRANSCODE_ERROR, "invalid byte sequence");
-	continue;
-      undef:
+    }
+    if (ret == transcode_undefined_conversion) {
 	/* valid character in from encoding
 	 * but no related character(s) in to encoding */
 	/* todo: add more alternative behaviors */
 	if (opt&UNDEF_IGNORE) {
-	    continue;
+	    goto resume;
 	}
 	else if (opt&UNDEF_REPLACE) {
-	    output_replacement_character(&out_p, to_encoding);
-	    continue;
+            if (out_stop - *out_pos < my_transcoder->max_output)
+                more_output_buffer(my_transcoding, &out_start, out_pos, &out_stop);
+	    output_replacement_character(out_pos, rb_enc_find(my_transcoder->to_encoding));
+	    goto resume;
 	}
-	rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)");
-	continue;
+        CLEANUP;
+        rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)");
     }
-    /* cleanup */
-    if (my_transcoder->finish_func) {
-	if (out_p >= out_s) {
-	    int len = (out_p - *out_pos);
-	    int new_len = (len + my_transcoder->max_output) * 2;
-	    *out_pos = (*my_transcoding->flush_func)(my_transcoding, len, new_len);
-	    out_p = *out_pos + len;
-	    out_s = *out_pos + new_len - my_transcoder->max_output;
-	}
-        out_p += my_transcoder->finish_func(my_transcoding, out_p);
+    if (ret == transcode_obuf_full) {
+        more_output_buffer(my_transcoding, &out_start, out_pos, &out_stop);
+        goto resume;
     }
-    *in_pos  = in_p;
-    *out_pos = out_p;
+
+    CLEANUP;
+    return;
+#undef CLEANUP
 }
+#else
+/* sample transcode_loop implementation in byte-by-byte stream style */
+static void
+transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
+	       const unsigned char *in_stop, unsigned char *out_stop,
+	       const rb_transcoder *my_transcoder,
+	       rb_transcoding *my_transcoding,
+	       const int opt)
+{
+    transcode_result_t ret;
+    unsigned char *out_start = *out_pos;
+    const unsigned char *ptr;
 
+    my_transcoding->resume_position = 0;
+    my_transcoding->readlen = 0;
 
+    if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) {
+        my_transcoding->readbuf.ptr = xmalloc(my_transcoder->max_input);
+    }
+#define CLEANUP \
+    do { \
+        if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) \
+            xfree(my_transcoding->readbuf.ptr); \
+    } while(0)
+
+    ret = transcode_ibuf_empty;
+    ptr = *in_pos;
+    while (ret != transcode_finished) {
+        unsigned char input_byte;
+        const unsigned char *p = &input_byte;
+
+        if (ret == transcode_ibuf_empty) {
+            if (ptr < in_stop) {
+                input_byte = *ptr;
+                ret = transcode_restartable(&p, out_pos, p+1, out_stop, my_transcoder, my_transcoding, opt|PARTIAL_INPUT);
+            }
+            else {
+                ret = transcode_restartable(NULL, out_pos, NULL, out_stop, my_transcoder, my_transcoding, opt);
+            }
+        }
+        else {
+            ret = transcode_restartable(NULL, out_pos, NULL, out_stop, my_transcoder, my_transcoding, opt|PARTIAL_INPUT);
+        }
+        if (&input_byte != p)
+            ptr += p - &input_byte;
+        switch (ret) {
+          case transcode_invalid_input:
+            /* deal with invalid byte sequence */
+            /* todo: add more alternative behaviors */
+            if (opt&INVALID_IGNORE) {
+                break;
+            }
+            else if (opt&INVALID_REPLACE) {
+                if (out_stop - *out_pos < my_transcoder->max_output)
+                    more_output_buffer(my_transcoding, &out_start, out_pos, &out_stop);
+                output_replacement_character(out_pos, rb_enc_find(my_transcoder->to_encoding));
+                break;
+            }
+            CLEANUP;
+            rb_raise(TRANSCODE_ERROR, "invalid byte sequence");
+            break;
+
+          case transcode_undefined_conversion:
+            /* valid character in from encoding
+             * but no related character(s) in to encoding */
+            /* todo: add more alternative behaviors */
+            if (opt&UNDEF_IGNORE) {
+                break;
+            }
+            else if (opt&UNDEF_REPLACE) {
+                if (out_stop - *out_pos < my_transcoder->max_output)
+                    more_output_buffer(my_transcoding, &out_start, out_pos, &out_stop);
+                output_replacement_character(out_pos, rb_enc_find(my_transcoder->to_encoding));
+                break;
+            }
+            CLEANUP;
+            rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)");
+            break;
+
+          case transcode_obuf_full:
+            more_output_buffer(my_transcoding, &out_start, out_pos, &out_stop);
+            break;
+
+          case transcode_ibuf_empty:
+            break;
+
+          case transcode_finished:
+            break;
+        }
+    }
+    CLEANUP;
+    *in_pos = in_stop;
+    return;
+#undef CLEANUP
+}
+#endif
+
+
 /*
  *  String-specific code
  */

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]