[前][次][番号順一覧][スレッド一覧]

ruby-changes:7111

From: akr <ko1@a...>
Date: Fri, 15 Aug 2008 00:57:03 +0900 (JST)
Subject: [ruby-changes:7111] Ruby:r18628 (trunk): * include/ruby/encoding.h (rb_econv_output): declared.

akr	2008-08-15 00:56:39 +0900 (Fri, 15 Aug 2008)

  New Revision: 18628

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18628

  Log:
    * include/ruby/encoding.h (rb_econv_output): declared.
    
    * transcode_data.h (rb_transcoder): add resetsize_func field.
    
    * enc/trans/iso2022.trans (iso2022jp_reset_sequence_size): defined.
      (rb_EUC_JP_to_ISO_2022_JP): provede resetsize_func.
    
    * tool/transcode-tblgen.rb: set NULL for resetsize_func.
    
    * transcode.c (rb_econv_output): new function for inserting output.
      (output_replacement_character): use rb_econv_output.
      (transcode_loop): check return value of
      output_replacement_character.

  Modified files:
    trunk/ChangeLog
    trunk/enc/trans/iso2022.trans
    trunk/include/ruby/encoding.h
    trunk/tool/transcode-tblgen.rb
    trunk/transcode.c
    trunk/transcode_data.h

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 18627)
+++ include/ruby/encoding.h	(revision 18628)
@@ -230,11 +230,15 @@
     rb_encoding *destination_encoding;
 } rb_econv_t;
 
-rb_econv_t *rb_econv_open(const char *from, const char *to, int flags);
+rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int flags);
 rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
-    const unsigned char **input_ptr, const unsigned char *input_stop,
-    unsigned char **output_ptr, unsigned char *output_stop,
+    const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
+    unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
     int flags);
+int rb_econv_output(rb_econv_t *ec,
+    const unsigned char *str, size_t len,
+    unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
+    size_t *required_size);
 void rb_econv_close(rb_econv_t *ec);
 
 /* flags for rb_econv_open */
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18627)
+++ ChangeLog	(revision 18628)
@@ -1,3 +1,19 @@
+Fri Aug 15 00:52:40 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/encoding.h (rb_econv_output): declared.
+
+	* transcode_data.h (rb_transcoder): add resetsize_func field.
+
+	* enc/trans/iso2022.trans (iso2022jp_reset_sequence_size): defined.
+	  (rb_EUC_JP_to_ISO_2022_JP): provede resetsize_func.
+
+	* tool/transcode-tblgen.rb: set NULL for resetsize_func.
+
+	* transcode.c (rb_econv_output): new function for inserting output.
+	  (output_replacement_character): use rb_econv_output.
+	  (transcode_loop): check return value of
+	  output_replacement_character.
+
 Thu Aug 14 23:47:21 2008  Tanaka Akira  <akr@f...>
 
 	* include/ruby/encoding.h (ECONV_UNIVERSAL_NEWLINE_DECODER): defined.
Index: enc/trans/iso2022.trans
===================================================================
--- enc/trans/iso2022.trans	(revision 18627)
+++ enc/trans/iso2022.trans	(revision 18628)
@@ -83,7 +83,7 @@
 
     if (t->stateful[0] == 0) {
         t->stateful[0] = 1; /* initialized flag */
-        t->stateful[1] = 1; /* ASCII mode */
+        t->stateful[1] = 1; /* G0 = ASCII */
     }
 
     if (l != t->stateful[1]) {
@@ -91,13 +91,13 @@
             *o++ = 0x1b;
             *o++ = '(';
             *o++ = 'B';
-            t->stateful[1] = 1;
+            t->stateful[1] = 1; /* G0 = ASCII */
         }
         else {
             *o++ = 0x1b;
             *o++ = '$';
             *o++ = 'B';
-            t->stateful[1] = 2;
+            t->stateful[1] = 2; /* G0 = JIS X 0208 1983 */
         }
     }
 
@@ -113,6 +113,14 @@
 }
 
 static int
+iso2022jp_reset_sequence_size(rb_transcoding *t)
+{
+    if (t->stateful[1] == 2)
+        return 3;
+    return 0;
+}
+
+static int
 finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o)
 {
     unsigned char *output0 = o;
@@ -137,7 +145,8 @@
     3, /* max_input */
     5, /* max_output */
     NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp,
-    finish_eucjp_to_iso2022jp, finish_eucjp_to_iso2022jp
+    finish_eucjp_to_iso2022jp,
+    iso2022jp_reset_sequence_size, finish_eucjp_to_iso2022jp
 };
 
 void
Index: transcode_data.h
===================================================================
--- transcode_data.h	(revision 18627)
+++ transcode_data.h	(revision 18628)
@@ -107,8 +107,9 @@
     VALUE (*func_si)(rb_transcoding*, const unsigned char*, size_t); /* start -> info   */
     int (*func_io)(rb_transcoding*, VALUE, const unsigned char*); /* info  -> output */
     int (*func_so)(rb_transcoding*, const unsigned char*, size_t, unsigned char*); /* start -> output */
+    int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */
+    int (*resetsize_func)(rb_transcoding*); /* -> len */
     int (*resetstate_func)(rb_transcoding*, unsigned char*); /* -> output */
-    int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */
 };
 
 void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
Index: tool/transcode-tblgen.rb
===================================================================
--- tool/transcode-tblgen.rb	(revision 18627)
+++ tool/transcode-tblgen.rb	(revision 18628)
@@ -449,7 +449,8 @@
     #{input_unit_length}, /* input_unit_length */
     #{max_input}, /* max_input */
     #{max_output}, /* max_output */
-    NULL, NULL, NULL, NULL, NULL, NULL
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL
 };
 End
   tree_code + "\n" + transcoder_code
Index: transcode.c
===================================================================
--- transcode.c	(revision 18627)
+++ transcode.c	(revision 18628)
@@ -937,6 +937,58 @@
     return res;
 }
 
+int
+rb_econv_output(rb_econv_t *ec,
+    const unsigned char *str, size_t len, /* string in destination encoding */
+    unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
+    size_t *required_size)
+{
+    size_t reset_len, total_len;
+    rb_transcoding *tc = ec->last_tc;
+    const rb_transcoder *tr = tc->transcoder;
+
+    /*
+     * Assumption for stateful encoding:
+     *
+     * - str can be output on resetted state and doesn't change the state.
+     * - it is acceptable that extra state changing sequence if str contains
+     *   a state changing sequence.
+     *
+     * Currently the replacement character for stateful encoding such as
+     * ISO-2022-JP is "?" and it has no state changing sequence.
+     * So the extra state changing sequence don't occur when
+     * rb_econv_output is used for replacement characters.
+     *
+     * Thease assumption may be removed in future.
+     * It needs to scan str to check state changing sequences in it.
+     */
+
+    reset_len = 0;
+    if (tr->resetsize_func) {
+        reset_len = tr->resetsize_func(tc);
+    }
+
+    total_len = reset_len + len;
+    if (total_len < len)
+        return -1;
+
+    if (required_size) {
+        *required_size = total_len;
+    }
+
+    if (destination_buffer_end - *destination_buffer_ptr < total_len)
+        return -1;
+
+    if (reset_len) {
+        *destination_buffer_ptr += tr->resetstate_func(tc, *destination_buffer_ptr);
+    }
+
+    memcpy(*destination_buffer_ptr, str, len);
+    *destination_buffer_ptr += len;
+
+    return 0;
+}
+
 void
 rb_econv_close(rb_econv_t *ec)
 {
@@ -968,58 +1020,40 @@
     *out_stop_ptr = *out_start_ptr + new_len;
 }
 
-static void
+static int
 output_replacement_character(
         VALUE destination,
         unsigned char *(*resize_destination)(VALUE, int, int),
-        rb_transcoding *tc,
+        rb_econv_t *ec,
         unsigned char **out_start_ptr,
         unsigned char **out_pos,
         unsigned char **out_stop_ptr)
 
 {
+    rb_transcoding *tc = ec->last_tc;
     const rb_transcoder *tr;
-    int max_output;
     rb_encoding *enc;
-    const char *replacement;
+    const unsigned char *replacement;
     int len;
+    size_t required_size;
 
     tr = tc->transcoder;
-    max_output = tr->max_output;
     enc = rb_enc_find(tr->to_encoding);
 
-    /*
-     * Assumption for stateful encoding:
-     *
-     * - The replacement character can be output on resetted state and doesn't
-     *   change the state.
-     * - it is acceptable that extra state changing sequence if the replacement
-     *   character contains a state changing sequence.
-     *
-     * Currently the replacement character for stateful encoding such as
-     * ISO-2022-JP is "?" and it has no state changing sequence.
-     * So the extra state changing sequence don't occur.
-     *
-     * Thease assumption may be removed in future.
-     * It needs to scan the replacement character to check
-     * state changing sequences in the replacement character.
-     */
+    replacement = (const unsigned char *)get_replacement_character(enc, &len);
 
-    if (tr->resetstate_func) {
-        if (*out_stop_ptr - *out_pos < max_output)
-            more_output_buffer(destination, resize_destination, max_output, out_start_ptr, out_pos, out_stop_ptr);
-        *out_pos += tr->resetstate_func(tc, *out_pos);
-    }
+    if (rb_econv_output(ec, replacement, len, out_pos, *out_stop_ptr, &required_size) == 0)
+        return 0;
 
-    if (*out_stop_ptr - *out_pos < max_output)
-        more_output_buffer(destination, resize_destination, max_output, out_start_ptr, out_pos, out_stop_ptr);
+    if (required_size < len)
+        return -1; /* overflow */
 
-    replacement = get_replacement_character(enc, &len);
+    more_output_buffer(destination, resize_destination, required_size, out_start_ptr, out_pos, out_stop_ptr);
 
-    memcpy(*out_pos, replacement, len);
+    if (rb_econv_output(ec, replacement, len, out_pos, *out_stop_ptr, &required_size) == 0)
+        return 0;
 
-    *out_pos += len;
-    return;
+    return -1;
 }
 
 #if 1
@@ -1054,8 +1088,8 @@
             goto resume;
 	}
 	else if (opt&INVALID_REPLACE) {
-	    output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
-            goto resume;
+	    if (output_replacement_character(destination, resize_destination, ec, &out_start, out_pos, &out_stop) == 0)
+                goto resume;
 	}
         rb_econv_close(ec);
 	rb_raise(rb_eInvalidByteSequence, "invalid byte sequence");
@@ -1068,8 +1102,8 @@
 	    goto resume;
 	}
 	else if (opt&UNDEF_REPLACE) {
-	    output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
-	    goto resume;
+	    if (output_replacement_character(destination, resize_destination, ec, &out_start, out_pos, &out_stop) == 0)
+                goto resume;
 	}
         rb_econv_close(ec);
         rb_raise(rb_eConversionUndefined, "conversion undefined for byte sequence (maybe invalid byte sequence)");
@@ -1135,8 +1169,8 @@
                 break;
             }
             else if (opt&INVALID_REPLACE) {
-                output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
-                break;
+                if (output_replacement_character(destination, resize_destination, ec, &out_start, out_pos, &out_stop) == 0)
+                    break;
             }
             rb_econv_close(ec);
             rb_raise(rb_eInvalidByteSequence, "invalid byte sequence");
@@ -1150,8 +1184,8 @@
                 break;
             }
             else if (opt&UNDEF_REPLACE) {
-                output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
-                break;
+                if (output_replacement_character(destination, resize_destination, ec, &out_start, out_pos, &out_stop) == 0)
+                    break;
             }
             rb_econv_close(ec);
             rb_raise(rb_eConversionUndefined, "conversion undefined for byte sequence (maybe invalid byte sequence)");

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]