ruby-changes:7356

akr	2008-08-27 01:09:29 +0900 (Wed, 27 Aug 2008)

  New Revision: 18875

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18875

  Log:
    * include/ruby/encoding.h (rb_econv_result_t): enumeration constant:
      econv_incomplete_input. 
    
    * io.c (finish_writeconv): check econv_incomplete_input.
    
    * transcode.c (transcode_restartable0): return econv_incomplete_input
      for unexpected end of source buffer.
      (trans_sweep): check econv_incomplete_input.
      (rb_trans_conv): ditto.
      (rb_econv_convert0): ditto.
      (rb_econv_convert): ditto.
      (transcode_loop): ditto.
      (make_econv_exception): change message for econv_incomplete_input.
      (econv_result_to_symbol): return :incomplete_input for
      econv_incomplete_input.
      (ecerr_incomplete_input): new method.

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/encoding.h
    trunk/io.c
    trunk/test/ruby/test_econv.rb
    trunk/transcode.c

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 18874)
+++ include/ruby/encoding.h	(revision 18875)
@@ -203,6 +203,7 @@
     econv_source_buffer_empty,
     econv_finished,
     econv_output_followed_by_input,
+    econv_incomplete_input,
 } rb_econv_result_t;
 
 typedef struct {
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18874)
+++ ChangeLog	(revision 18875)
@@ -1,3 +1,22 @@
+Wed Aug 27 01:03:23 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/encoding.h (rb_econv_result_t): enumeration constant:
+	  econv_incomplete_input. 
+
+	* io.c (finish_writeconv): check econv_incomplete_input.
+
+	* transcode.c (transcode_restartable0): return econv_incomplete_input
+	  for unexpected end of source buffer.
+	  (trans_sweep): check econv_incomplete_input.
+	  (rb_trans_conv): ditto.
+	  (rb_econv_convert0): ditto.
+	  (rb_econv_convert): ditto.
+	  (transcode_loop): ditto.
+	  (make_econv_exception): change message for econv_incomplete_input.
+	  (econv_result_to_symbol): return :incomplete_input for
+	  econv_incomplete_input.
+	  (ecerr_incomplete_input): new method.
+
 Wed Aug 27 00:05:55 2008  Tanaka Akira  <akr@f...>
 
 	* include/ruby/io.h (rb_io_t): rename crbuf to cbuf.
Index: io.c
===================================================================
--- io.c	(revision 18874)
+++ io.c	(revision 18875)
@@ -2967,7 +2967,7 @@
             de = buf + sizeof(buf);
             res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0);
             while (dp-ds) {
-retry:
+              retry:
                 r = rb_write_internal(fptr->fd, ds, dp-ds);
                 if (r == dp-ds)
                     break;
@@ -2987,6 +2987,7 @@
                 rb_econv_check_error(fptr->writeconv);
             }
             if (res == econv_invalid_byte_sequence ||
+                res == econv_incomplete_input ||
                 res == econv_undefined_conversion) {
                 break;
             }
@@ -3009,6 +3010,7 @@
             rb_econv_check_error(fptr->writeconv);
         }
         if (res == econv_invalid_byte_sequence ||
+            res == econv_incomplete_input ||
             res == econv_undefined_conversion) {
             break;
         }
@@ -3603,7 +3605,7 @@
         }
     }
 
-finished:
+  finished:
     if ((flags & FMODE_BINMODE) && (flags & FMODE_TEXTMODE))
         goto error;
 
@@ -7051,7 +7053,7 @@
         }
     }
 
-retry_sendfile:
+  retry_sendfile:
     if (use_pread) {
         ss = simple_sendfile(stp->dst_fd, stp->src_fd, &src_offset, copy_length);
     }
@@ -7095,7 +7097,7 @@
 copy_stream_read(struct copy_stream_struct *stp, char *buf, int len, off_t offset)
 {
     ssize_t ss;
-retry_read:
+  retry_read:
     if (offset == (off_t)-1)
         ss = read(stp->src_fd, buf, len);
     else {
@@ -7231,7 +7233,7 @@
     copy_stream_read_write(stp);
 
 #ifdef USE_SENDFILE
-finish:
+  finish:
 #endif
     return Qnil;
 }
Index: test/ruby/test_econv.rb
===================================================================
--- test/ruby/test_econv.rb	(revision 18874)
+++ test/ruby/test_econv.rb	(revision 18875)
@@ -364,7 +364,7 @@
   def test_errinfo_invalid_partial_character
     ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
     ec.primitive_convert(src="\xa4", dst="", nil, 10)
-    assert_errinfo(:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xA4", "", nil, ec)
+    assert_errinfo(:incomplete_input, "EUC-JP", "UTF-8", "\xA4", "", nil, ec)
   end
 
   def test_errinfo_valid_partial_character
@@ -426,8 +426,20 @@
     assert_equal("UTF-8", err.destination_encoding)
     assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
     assert_equal("d", err.readagain_bytes)
+    assert_equal(false, err.incomplete_input?)
   end
 
+  def test_exc_incomplete
+    err = assert_raise(Encoding::InvalidByteSequence) {
+      "abc\xa4".encode("ISO-8859-1", "EUC-JP")
+    }
+    assert_equal("EUC-JP", err.source_encoding)
+    assert_equal("UTF-8", err.destination_encoding)
+    assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
+    assert_equal(nil, err.readagain_bytes)
+    assert_equal(true, err.incomplete_input?)
+  end
+
   def test_exc_undef
     err = assert_raise(Encoding::ConversionUndefined) {
       "abc\xa4\xa2def".encode("ISO-8859-1", "EUC-JP")
Index: transcode.c
===================================================================
--- transcode.c	(revision 18874)
+++ transcode.c	(revision 18875)
@@ -466,6 +466,7 @@
       case 24: goto resume_label24;
       case 25: goto resume_label25;
       case 26: goto resume_label26;
+      case 27: goto resume_label27;
     }
 
     while (1) {
@@ -500,7 +501,7 @@
             SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(25);
 	    while (in_p >= in_stop) {
                 if (!(opt & ECONV_PARTIAL_INPUT))
-                    goto invalid;
+                    goto incomplete;
                 SUSPEND(econv_source_buffer_empty, 5);
 	    }
 	    next_byte = (unsigned char)*in_p++;
@@ -602,6 +603,10 @@
         SUSPEND(econv_invalid_byte_sequence, 1);
         continue;
 
+      incomplete:
+        SUSPEND(econv_incomplete_input, 27);
+        continue;
+
       undef:
         SUSPEND(econv_undefined_conversion, 2);
         continue;
@@ -949,6 +954,7 @@
 
             switch (res) {
               case econv_invalid_byte_sequence:
+              case econv_incomplete_input:
               case econv_undefined_conversion:
               case econv_output_followed_by_input:
                 return i;
@@ -997,6 +1003,7 @@
     for (i = ec->num_trans-1; 0 <= i; i--) {
         switch (ec->elems[i].last_result) {
           case econv_invalid_byte_sequence:
+          case econv_incomplete_input:
           case econv_undefined_conversion:
           case econv_output_followed_by_input:
           case econv_finished:
@@ -1030,7 +1037,7 @@
 
     sweep_start = 0;
 
-found_needreport:
+  found_needreport:
 
     do {
         needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
@@ -1041,6 +1048,7 @@
         if (ec->elems[i].last_result != econv_source_buffer_empty) {
             rb_econv_result_t res = ec->elems[i].last_result;
             if (res == econv_invalid_byte_sequence ||
+                res == econv_incomplete_input ||
                 res == econv_undefined_conversion ||
                 res == econv_output_followed_by_input) {
                 ec->elems[i].last_result = econv_source_buffer_empty;
@@ -1160,10 +1168,11 @@
         } while (res == econv_output_followed_by_input);
     }
 
-gotresult:
+  gotresult:
     ec->last_error.result = res;
     ec->last_error.partial_input = flags & ECONV_PARTIAL_INPUT;
     if (res == econv_invalid_byte_sequence ||
+        res == econv_incomplete_input ||
         res == econv_undefined_conversion) {
         rb_transcoding *error_tc = ec->elems[result_position].tc;
         ec->last_error.error_tc = error_tc;
@@ -1200,10 +1209,11 @@
         output_stop = empty_ptr;
     }
 
-resume:
+  resume:
     ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
 
-    if (ret == econv_invalid_byte_sequence) {
+    if (ret == econv_invalid_byte_sequence ||
+        ret == econv_incomplete_input) {
 	/* deal with invalid byte sequence */
 	/* todo: add more alternative behaviors */
 	if (ec->opts.flags&ECONV_INVALID_IGNORE) {
@@ -1398,7 +1408,7 @@
         xfree((void*)insert_str);
     return 0;
 
-fail:
+  fail:
     if (insert_str != str)
         xfree((void*)insert_str);
     return -1;
@@ -1620,7 +1630,8 @@
 make_econv_exception(rb_econv_t *ec)
 {
     VALUE mesg, exc;
-    if (ec->last_error.result == econv_invalid_byte_sequence) {
+    if (ec->last_error.result == econv_invalid_byte_sequence ||
+        ec->last_error.result == econv_incomplete_input) {
         const char *err = (const char *)ec->last_error.error_bytes_start;
         size_t error_len = ec->last_error.error_bytes_len;
         VALUE bytes = rb_str_new(err, error_len);
@@ -1628,7 +1639,12 @@
         size_t readagain_len = ec->last_error.readagain_len;
         VALUE bytes2 = Qnil;
         VALUE dumped2;
-        if (readagain_len) {
+        if (ec->last_error.result == econv_incomplete_input) {
+            mesg = rb_sprintf("incomplete input: %s on %s",
+                    StringValueCStr(dumped),
+                    ec->last_error.source_encoding);
+        }
+        else if (readagain_len) {
             bytes2 = rb_str_new(err+error_len, readagain_len);
             dumped2 = rb_str_dump(bytes2);
             mesg = rb_sprintf("invalid byte sequence: %s followed by %s on %s",
@@ -1647,6 +1663,7 @@
         rb_ivar_set(exc, rb_intern("destination_encoding"), rb_str_new2(ec->last_error.destination_encoding));
         rb_ivar_set(exc, rb_intern("error_bytes"), bytes);
         rb_ivar_set(exc, rb_intern("readagain_bytes"), bytes2);
+        rb_ivar_set(exc, rb_intern("incomplete_input"), ec->last_error.result == econv_incomplete_input ? Qtrue : Qfalse);
         return exc;
     }
     if (ec->last_error.result == econv_undefined_conversion) {
@@ -1742,10 +1759,11 @@
     last_tc = ec->last_tc;
     max_output = last_tc ? last_tc->transcoder->max_output : 1;
 
-resume:
+  resume:
     ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
 
-    if (ret == econv_invalid_byte_sequence) {
+    if (ret == econv_invalid_byte_sequence ||
+        ret == econv_incomplete_input) {
         exc = make_econv_exception(ec);
         rb_econv_close(ec);
 	rb_exc_raise(exc);
@@ -1812,6 +1830,7 @@
             ptr += p - &input_byte;
         switch (ret) {
           case econv_invalid_byte_sequence:
+          case econv_incomplete_input:
             exc = make_econv_exception(ec);
             rb_econv_close(ec);
             rb_exc_raise(exc);
@@ -2291,6 +2310,7 @@
 {
     switch (res) {
       case econv_invalid_byte_sequence: return ID2SYM(rb_intern("invalid_byte_sequence"));
+      case econv_incomplete_input: return ID2SYM(rb_intern("incomplete_input"));
       case econv_undefined_conversion: return ID2SYM(rb_intern("undefined_conversion"));
       case econv_destination_buffer_full: return ID2SYM(rb_intern("destination_buffer_full"));
       case econv_source_buffer_empty: return ID2SYM(rb_intern("source_buffer_empty"));
@@ -2311,6 +2331,7 @@
  *
  * possible results:
  *    :invalid_byte_sequence
+ *    :incomplete_input
  *    :undefined_conversion
  *    :output_followed_by_input
  *    :destination_buffer_full
@@ -2342,6 +2363,8 @@
  *
  * primitive_convert stops conversion when one of following condition met.
  * - invalid byte sequence found in source buffer (:invalid_byte_sequence)
+ * - unexpected end of source buffer (:incomplete_input)
+ *   this occur only when PARTIAL_INPUT is not specified.
  * - character not representable in output encoding (:undefined_conversion)
  * - after some output is generated, before input is done (:output_followed_by_input)
  *   this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified.
@@ -2451,7 +2474,7 @@
  * for primitive_convert.
  *
  * Other elements are only meaningful when result is
- * :invalid_byte_sequence or :undefined_conversion.
+ * :invalid_byte_sequence, :incomplete_input or :undefined_conversion.
  *
  * enc1 and enc2 indicats a conversion step as pair of strings.
  * For example, EUC-JP to ISO-8859-1 is
@@ -2482,7 +2505,7 @@
  *   ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
  *   ec.primitive_convert(src="\xa4", dst="", nil, 10)
  *   p ec.primitive_errinfo
- *   #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xA4", "", nil]
+ *   #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", "", nil]
  *
  *   # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
  *   # partial characters.
@@ -2625,6 +2648,12 @@
     return rb_attr_get(self, rb_intern("readagain_bytes"));
 }
 
+static VALUE
+ecerr_incomplete_input(VALUE self)
+{
+    return rb_attr_get(self, rb_intern("incomplete_input"));
+}
+
 extern void Init_newline(void);
 
 void
@@ -2674,6 +2703,7 @@
     rb_define_method(rb_eInvalidByteSequence, "destination_encoding", ecerr_destination_encoding, 0);
     rb_define_method(rb_eInvalidByteSequence, "error_bytes", ecerr_error_bytes, 0);
     rb_define_method(rb_eInvalidByteSequence, "readagain_bytes", ecerr_readagain_bytes, 0);
+    rb_define_method(rb_eInvalidByteSequence, "incomplete_input?", ecerr_incomplete_input, 0);
 
     Init_newline();
 }

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/