ruby-changes:7268

akr	2008-08-23 15:02:58 +0900 (Sat, 23 Aug 2008)

  New Revision: 18787

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18787

  Log:
    * include/ruby/encoding.h (ECONV_INVALID_MASK): defined.
      (ECONV_INVALID_IGNORE): defined.
      (ECONV_INVALID_REPLACE): defined.
      (ECONV_UNDEF_MASK): defined.
      (ECONV_UNDEF_IGNORE): defined.
      (ECONV_UNDEF_REPLACE): defined.
    
    * transcode.c (INVALID_IGNORE): removed.
      (INVALID_REPLACE): removed.
      (UNDEF_IGNORE): removed.
      (UNDEF_REPLACE): removed.
      (rb_econv_convert0): renamed from rb_econv_convert.
      (rb_econv_convert): defined to call rb_econv_convert0 with
      replace/ignore behavior moved from transcode_loop.
      (transcode_loop): replace/ignore behavior removed.

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/encoding.h
    trunk/test/ruby/test_econv.rb
    trunk/transcode.c

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 18786)
+++ include/ruby/encoding.h	(revision 18787)
@@ -277,10 +277,18 @@
 void rb_econv_binmode(rb_econv_t *ec);
 
 /* flags for rb_econv_open */
-#define ECONV_UNIVERSAL_NEWLINE_DECODER       0x100
-#define ECONV_CRLF_NEWLINE_ENCODER            0x200
-#define ECONV_CR_NEWLINE_ENCODER              0x400
+#define ECONV_INVALID_MASK                      0x000f
+#define ECONV_INVALID_IGNORE                    0x0001
+#define ECONV_INVALID_REPLACE                   0x0002
 
+#define ECONV_UNDEF_MASK                        0x00f0
+#define ECONV_UNDEF_IGNORE                      0x0010
+#define ECONV_UNDEF_REPLACE                     0x0020
+
+#define ECONV_UNIVERSAL_NEWLINE_DECODER         0x0100
+#define ECONV_CRLF_NEWLINE_ENCODER              0x0200
+#define ECONV_CR_NEWLINE_ENCODER                0x0400
+
 /* flags for rb_econv_convert */
 #define ECONV_PARTIAL_INPUT                   0x10000
 #define ECONV_OUTPUT_FOLLOWED_BY_INPUT        0x20000
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 18786)
+++ ChangeLog	(revision 18787)
@@ -1,3 +1,21 @@
+Sat Aug 23 14:59:32 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/encoding.h (ECONV_INVALID_MASK): defined.
+	  (ECONV_INVALID_IGNORE): defined.
+	  (ECONV_INVALID_REPLACE): defined.
+	  (ECONV_UNDEF_MASK): defined.
+	  (ECONV_UNDEF_IGNORE): defined.
+	  (ECONV_UNDEF_REPLACE): defined.
+
+	* transcode.c (INVALID_IGNORE): removed.
+	  (INVALID_REPLACE): removed.
+	  (UNDEF_IGNORE): removed.
+	  (UNDEF_REPLACE): removed.
+	  (rb_econv_convert0): renamed from rb_econv_convert.
+	  (rb_econv_convert): defined to call rb_econv_convert0 with
+	  replace/ignore behavior moved from transcode_loop.
+	  (transcode_loop): replace/ignore behavior removed.
+
 Sat Aug 23 11:23:05 2008  Tanaka Akira  <akr@f...>
 
 	* io.c (rb_io_extract_modeenc): check :textmode and :binmode in option
Index: test/ruby/test_econv.rb
===================================================================
--- test/ruby/test_econv.rb	(revision 18786)
+++ test/ruby/test_econv.rb	(revision 18787)
@@ -448,4 +448,37 @@
     assert_equal(["abcdef", ""], [dst, src])
   end
 
+  def test_invalid_replace
+    ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::INVALID_REPLACE)
+    ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
+    assert_equal(:finished, ret)
+    assert_equal("", src)
+    assert_equal("abc?def", dst)
+  end
+
+  def test_invalid_ignore
+    ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::INVALID_IGNORE)
+    ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
+    assert_equal(:finished, ret)
+    assert_equal("", src)
+    assert_equal("abcdef", dst)
+  end
+
+  def test_undef_replace
+    ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_REPLACE)
+    ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
+    assert_equal(:finished, ret)
+    assert_equal("", src)
+    assert_equal("abc?def", dst)
+  end
+
+  def test_undef_ignore
+    ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_IGNORE)
+    ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
+    assert_equal(:finished, ret)
+    assert_equal("", src)
+    assert_equal("abcdef", dst)
+  end
+
+
 end
Index: transcode.c
===================================================================
--- transcode.c	(revision 18786)
+++ transcode.c	(revision 18787)
@@ -21,10 +21,6 @@
 VALUE rb_cEncodingConverter;
 
 static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
-#define INVALID_IGNORE                  0x1
-#define INVALID_REPLACE                 0x2
-#define UNDEF_IGNORE                    0x10
-#define UNDEF_REPLACE                   0x20
 
 /*
  *  Dispatch data and logic
@@ -972,8 +968,8 @@
     return econv_source_buffer_empty;
 }
 
-rb_econv_result_t
-rb_econv_convert(rb_econv_t *ec,
+static rb_econv_result_t
+rb_econv_convert0(rb_econv_t *ec,
     const unsigned char **input_ptr, const unsigned char *input_stop,
     unsigned char **output_ptr, unsigned char *output_stop,
     int flags)
@@ -1051,6 +1047,47 @@
     return res;
 }
 
+static int output_replacement_character(rb_econv_t *ec);
+
+rb_econv_result_t
+rb_econv_convert(rb_econv_t *ec,
+    const unsigned char **input_ptr, const unsigned char *input_stop,
+    unsigned char **output_ptr, unsigned char *output_stop,
+    int flags)
+{
+    rb_econv_result_t ret;
+
+resume:
+    ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
+
+    if (ret == econv_invalid_byte_sequence) {
+	/* deal with invalid byte sequence */
+	/* todo: add more alternative behaviors */
+	if (ec->flags&ECONV_INVALID_IGNORE) {
+            goto resume;
+	}
+	else if (ec->flags&ECONV_INVALID_REPLACE) {
+	    if (output_replacement_character(ec) == 0)
+                goto resume;
+	}
+    }
+
+    if (ret == econv_undefined_conversion) {
+	/* valid character in source encoding
+	 * but no related character(s) in destination encoding */
+	/* todo: add more alternative behaviors */
+	if (ec->flags&ECONV_UNDEF_IGNORE) {
+	    goto resume;
+	}
+	else if (ec->flags&ECONV_UNDEF_REPLACE) {
+	    if (output_replacement_character(ec) == 0)
+                goto resume;
+	}
+    }
+
+    return ret;
+}
+
 const char *
 rb_econv_encoding_to_insert_output(rb_econv_t *ec)
 {
@@ -1455,7 +1492,7 @@
     int max_output;
     VALUE exc;
 
-    ec = rb_econv_open(from_encoding, to_encoding, 0);
+    ec = rb_econv_open(from_encoding, to_encoding, opt & (ECONV_INVALID_MASK|ECONV_UNDEF_MASK));
     if (!ec)
         rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
 
@@ -1464,35 +1501,18 @@
 
 resume:
     ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, opt);
+
     if (ret == econv_invalid_byte_sequence) {
-	/* deal with invalid byte sequence */
-	/* todo: add more alternative behaviors */
-	if (opt&INVALID_IGNORE) {
-            goto resume;
-	}
-	else if (opt&INVALID_REPLACE) {
-	    if (output_replacement_character(ec) == 0)
-                goto resume;
-	}
         exc = make_econv_exception(ec);
         rb_econv_close(ec);
 	rb_exc_raise(exc);
     }
     if (ret == econv_undefined_conversion) {
-	/* valid character in from encoding
-	 * but no related character(s) in to encoding */
-	/* todo: add more alternative behaviors */
-	if (opt&UNDEF_IGNORE) {
-	    goto resume;
-	}
-	else if (opt&UNDEF_REPLACE) {
-	    if (output_replacement_character(ec) == 0)
-                goto resume;
-	}
         exc = make_econv_exception(ec);
         rb_econv_close(ec);
 	rb_exc_raise(exc);
     }
+
     if (ret == econv_destination_buffer_full) {
         more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
         goto resume;
@@ -1520,7 +1540,7 @@
     int max_output;
     VALUE exc;
 
-    ec = rb_econv_open(from_encoding, to_encoding, 0);
+    ec = rb_econv_open(from_encoding, to_encoding, opt & (ECONV_INVALID_MASK|ECONV_UNDEF_MASK));
     if (!ec)
         rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
 
@@ -1549,31 +1569,12 @@
             ptr += p - &input_byte;
         switch (ret) {
           case econv_invalid_byte_sequence:
-            /* deal with invalid byte sequence */
-            /* todo: add more alternative behaviors */
-            if (opt&INVALID_IGNORE) {
-                break;
-            }
-            else if (opt&INVALID_REPLACE) {
-                if (output_replacement_character(ec) == 0)
-                    break;
-            }
             exc = make_econv_exception(ec);
             rb_econv_close(ec);
             rb_exc_raise(exc);
             break;
 
           case econv_undefined_conversion:
-            /* valid character in from encoding
-             * but no related character(s) in to encoding */
-            /* todo: add more alternative behaviors */
-            if (opt&UNDEF_IGNORE) {
-                break;
-            }
-            else if (opt&UNDEF_REPLACE) {
-                if (output_replacement_character(ec) == 0)
-                    break;
-            }
             exc = make_econv_exception(ec);
             rb_econv_close(ec);
             rb_exc_raise(exc);
@@ -1632,10 +1633,10 @@
 	if (NIL_P(v)) {
 	}
 	else if (v==sym_ignore) {
-	    options |= INVALID_IGNORE;
+	    options |= ECONV_INVALID_IGNORE;
 	}
 	else if (v==sym_replace) {
-	    options |= INVALID_REPLACE;
+	    options |= ECONV_INVALID_REPLACE;
 	    v = rb_hash_aref(opt, sym_replace);
 	}
 	else {
@@ -1645,10 +1646,10 @@
 	if (NIL_P(v)) {
 	}
 	else if (v==sym_ignore) {
-	    options |= UNDEF_IGNORE;
+	    options |= ECONV_UNDEF_IGNORE;
 	}
 	else if (v==sym_replace) {
-	    options |= UNDEF_REPLACE;
+	    options |= ECONV_UNDEF_REPLACE;
 	}
 	else {
 	    rb_raise(rb_eArgError, "unknown value for undefined character option");
@@ -2331,6 +2332,12 @@
     rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0);
     rb_define_method(rb_cEncodingConverter, "primitive_insert_output", econv_primitive_insert_output, 1);
     rb_define_method(rb_cEncodingConverter, "primitive_putback", econv_primitive_putback, 1);
+    rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK));
+    rb_define_const(rb_cEncodingConverter, "INVALID_IGNORE", INT2FIX(ECONV_INVALID_IGNORE));
+    rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE));
+    rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK));
+    rb_define_const(rb_cEncodingConverter, "UNDEF_IGNORE", INT2FIX(ECONV_UNDEF_IGNORE));
+    rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE));
     rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT));
     rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT));
     rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER));

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/