ruby-changes:7602

akr	2008-09-04 19:15:34 +0900 (Thu, 04 Sep 2008)

  New Revision: 19123

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19123

  Log:
    * include/ruby/encoding.h (ECONV_INVALID_IGNORE): removed because
      it tend to cause security problem.  If the behaviour is really
      required, ECONV_INVALID_REPLACE with empty string can be used.
      For example, CVE-2006-2313, CVE-2008-1036, [ruby-core:15645]
      (ECONV_UNDEF_IGNORE): ditto.
    
    * transcode.c (rb_econv_convert): follow the above change.
      (econv_opts): ditto.
      (Init_transcode): ditto.

  Modified files:
    trunk/ChangeLog
    trunk/include/ruby/encoding.h
    trunk/test/ruby/test_econv.rb
    trunk/test/ruby/test_io_m17n.rb
    trunk/test/ruby/test_transcode.rb
    trunk/transcode.c

Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 19122)
+++ include/ruby/encoding.h	(revision 19123)
@@ -251,11 +251,9 @@
 
 /* flags for rb_econv_open */
 #define ECONV_INVALID_MASK                      0x000f
-#define ECONV_INVALID_IGNORE                    0x0001
 #define ECONV_INVALID_REPLACE                   0x0002
 
 #define ECONV_UNDEF_MASK                        0x00f0
-#define ECONV_UNDEF_IGNORE                      0x0010
 #define ECONV_UNDEF_REPLACE                     0x0020
 
 /* effective only if output is ascii compatible */
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 19122)
+++ ChangeLog	(revision 19123)
@@ -1,3 +1,15 @@
+Thu Sep  4 19:10:27 2008  Tanaka Akira  <akr@f...>
+
+	* include/ruby/encoding.h (ECONV_INVALID_IGNORE): removed because
+	  it tend to cause security problem.  If the behaviour is really
+	  required, ECONV_INVALID_REPLACE with empty string can be used.
+	  For example, CVE-2006-2313, CVE-2008-1036, [ruby-core:15645]
+	  (ECONV_UNDEF_IGNORE): ditto.
+
+	* transcode.c (rb_econv_convert): follow the above change.
+	  (econv_opts): ditto.
+	  (Init_transcode): ditto.
+
 Thu Sep  4 13:22:02 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* vm_core.h (struct rb_vm_struct): replaced signal staff with trap
Index: test/ruby/test_transcode.rb
===================================================================
--- test/ruby/test_transcode.rb	(revision 19122)
+++ test/ruby/test_transcode.rb	(revision 19123)
@@ -247,23 +247,23 @@
   
   def test_invalid_ignore
     # arguments only
-    assert_nothing_raised { 'abc'.encode('utf-8', invalid: :ignore) }
+    assert_nothing_raised { 'abc'.encode('utf-8', invalid: :replace, replace: "") }
     # check handling of UTF-8 ill-formed subsequences
     assert_equal("\x00\x41\x00\x3E\x00\x42".force_encoding('UTF-16BE'),
-      "\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
+      "\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: ""))
     assert_equal("\x00\x41\x00\xF1\x00\x42".force_encoding('UTF-16BE'),
-      "\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
+      "\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: ""))
     assert_equal("\x00\x42".force_encoding('UTF-16BE'),
-      "\xF0\x80\x80\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
+      "\xF0\x80\x80\x42".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: ""))
     assert_equal(''.force_encoding('UTF-16BE'),
-      "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :ignore))
+      "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: ""))
 
     assert_equal("\e$B!!\e(B".force_encoding("ISO-2022-JP"),
-      "\xA1\xA1\xFF".encode("ISO-2022-JP", "EUC-JP", invalid: :ignore))
+      "\xA1\xA1\xFF".encode("ISO-2022-JP", "EUC-JP", invalid: :replace, replace: ""))
     assert_equal("\e$B\x24\x22\x24\x24\e(B".force_encoding("ISO-2022-JP"),
-      "\xA4\xA2\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :ignore))
+      "\xA4\xA2\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :replace, replace: ""))
     assert_equal("\e$B\x24\x22\x24\x24\e(B".force_encoding("ISO-2022-JP"),
-      "\xA4\xA2\xFF\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :ignore))
+      "\xA4\xA2\xFF\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :replace, replace: ""))
   end
 
   def test_invalid_replace
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 19122)
+++ test/ruby/test_io_m17n.rb	(revision 19123)
@@ -1312,14 +1312,14 @@
       open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f|
         assert_equal("a?b", f.read)
       }
-      open("t.txt", "r:utf-8:euc-jp", :invalid => :ignore) {|f|
+      open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f|
         assert_equal("ab", f.read)
       }
       open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f|
         assert_raise(Encoding::InvalidByteSequence) { f.read }
         assert_equal("b", f.read)
       }
-      open("t.txt", "r:utf-8:euc-jp", :undef => :ignore) {|f|
+      open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f|
         assert_raise(Encoding::InvalidByteSequence) { f.read }
         assert_equal("b", f.read)
       }
@@ -1332,14 +1332,14 @@
       open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f|
         assert_equal("a?b", f.read)
       }
-      open("t.txt", "r:utf-8:euc-jp", :undef => :ignore) {|f|
+      open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f|
         assert_equal("ab", f.read)
       }
       open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f|
         assert_raise(Encoding::ConversionUndefined) { f.read }
         assert_equal("b", f.read)
       }
-      open("t.txt", "r:utf-8:euc-jp", :invalid => :ignore) {|f|
+      open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f|
         assert_raise(Encoding::ConversionUndefined) { f.read }
         assert_equal("b", f.read)
       }
@@ -1354,7 +1354,7 @@
       }
       assert_equal("a?b", File.read("t.txt"))
 
-      open("t.txt", "w:euc-jp", :invalid => :ignore) {|f|
+      open("t.txt", "w:euc-jp", :invalid => :replace, :replace => "") {|f|
         assert_nothing_raised { f.write invalid_utf8 }
       }
       assert_equal("ab", File.read("t.txt"))
@@ -1362,7 +1362,7 @@
       open("t.txt", "w:euc-jp", :undef => :replace) {|f|
         assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 }
       }
-      open("t.txt", "w:euc-jp", :undef => :ignore) {|f|
+      open("t.txt", "w:euc-jp", :undef => :replace, :replace => "") {|f|
         assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 }
       }
     }
@@ -1375,14 +1375,14 @@
         assert_nothing_raised { f.write "a\uFFFDb" }
       }
       assert_equal("a?b", File.read("t.txt"))
-      open("t.txt", "w:euc-jp:utf-8", :undef => :ignore) {|f|
+      open("t.txt", "w:euc-jp:utf-8", :undef => :replace, :replace => "") {|f|
         assert_nothing_raised { f.write "a\uFFFDb" }
       }
       assert_equal("ab", File.read("t.txt"))
       open("t.txt", "w:euc-jp:utf-8", :invalid => :replace) {|f|
         assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
       }
-      open("t.txt", "w:euc-jp:utf-8", :invalid => :ignore) {|f|
+      open("t.txt", "w:euc-jp:utf-8", :invalid => :replace, :replace => "") {|f|
         assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
       }
     }
@@ -1395,14 +1395,14 @@
         assert_nothing_raised { f.write "a\uFFFDb" }
       }
       assert_equal("a?b", File.read("t.txt"))
-      open("t.txt", "w:iso-2022-jp:utf-8", :undef => :ignore) {|f|
+      open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace, :replace => "") {|f|
         assert_nothing_raised { f.write "a\uFFFDb" }
       }
       assert_equal("ab", File.read("t.txt"))
       open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace) {|f|
         assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
       }
-      open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :ignore) {|f|
+      open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace, :replace => "") {|f|
         assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" }
       }
     }
Index: test/ruby/test_econv.rb
===================================================================
--- test/ruby/test_econv.rb	(revision 19122)
+++ test/ruby/test_econv.rb	(revision 19123)
@@ -524,7 +524,7 @@
   end
 
   def test_invalid_ignore
-    ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::INVALID_IGNORE)
+    ec = Encoding::Converter.new("UTF-8", "EUC-JP", :invalid => :replace, :replace => "")
     ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
     assert_equal(:finished, ret)
     assert_equal("", src)
@@ -540,7 +540,7 @@
   end
 
   def test_undef_ignore
-    ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_IGNORE)
+    ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace, :replace => "")
     ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
     assert_equal(:finished, ret)
     assert_equal("", src)
Index: transcode.c
===================================================================
--- transcode.c	(revision 19122)
+++ transcode.c	(revision 19123)
@@ -1286,10 +1286,7 @@
         ret == econv_incomplete_input) {
 	/* deal with invalid byte sequence */
 	/* todo: add more alternative behaviors */
-	if (ec->flags&ECONV_INVALID_IGNORE) {
-            goto resume;
-	}
-	else if (ec->flags&ECONV_INVALID_REPLACE) {
+	if (ec->flags&ECONV_INVALID_REPLACE) {
 	    if (output_replacement_character(ec) == 0)
                 goto resume;
 	}
@@ -1299,10 +1296,7 @@
 	/* valid character in source encoding
 	 * but no related character(s) in destination encoding */
 	/* todo: add more alternative behaviors */
-	if (ec->flags&ECONV_UNDEF_IGNORE) {
-	    goto resume;
-	}
-	else if (ec->flags&ECONV_UNDEF_REPLACE) {
+	if (ec->flags&ECONV_UNDEF_REPLACE) {
 	    if (output_replacement_character(ec) == 0)
                 goto resume;
 	}
@@ -2009,9 +2003,6 @@
     v = rb_hash_aref(opt, sym_invalid);
     if (NIL_P(v)) {
     }
-    else if (v==sym_ignore) {
-        options |= ECONV_INVALID_IGNORE;
-    }
     else if (v==sym_replace) {
         options |= ECONV_INVALID_REPLACE;
         v = rb_hash_aref(opt, sym_replace);
@@ -2022,9 +2013,6 @@
     v = rb_hash_aref(opt, sym_undef);
     if (NIL_P(v)) {
     }
-    else if (v==sym_ignore) {
-        options |= ECONV_UNDEF_IGNORE;
-    }
     else if (v==sym_replace) {
         options |= ECONV_UNDEF_REPLACE;
     }
@@ -3314,10 +3302,8 @@
     rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0);
     rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1);
     rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK));
-    rb_define_const(rb_cEncodingConverter, "INVALID_IGNORE", INT2FIX(ECONV_INVALID_IGNORE));
     rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE));
     rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK));
-    rb_define_const(rb_cEncodingConverter, "UNDEF_IGNORE", INT2FIX(ECONV_UNDEF_IGNORE));
     rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE));
     rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT));
     rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT));

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/