[前][次][番号順一覧][スレッド一覧]

ruby-changes:7752

From: akr <ko1@a...>
Date: Tue, 9 Sep 2008 23:57:12 +0900 (JST)
Subject: [ruby-changes:7752] Ruby:r19273 (trunk): * io.c (io_fwrite): raise an error if ASCII incompatible string

akr	2008-09-09 23:56:55 +0900 (Tue, 09 Sep 2008)

  New Revision: 19273

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19273

  Log:
    * io.c (io_fwrite): raise an error if ASCII incompatible string
      written for text mode IO without encoding conversion.
      (rb_io_extract_modeenc): binmode requirement changed.

  Modified files:
    trunk/ChangeLog
    trunk/io.c
    trunk/test/ruby/test_io_m17n.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 19272)
+++ ChangeLog	(revision 19273)
@@ -1,3 +1,9 @@
+Tue Sep  9 23:55:26 2008  Tanaka Akira  <akr@f...>
+
+	* io.c (io_fwrite): raise an error if ASCII incompatible string
+	  written for text mode IO without encoding conversion.
+	  (rb_io_extract_modeenc): binmode requirement changed.
+
 Tue Sep  9 21:59:48 2008  Takeyuki Fujioka  <xibbar@r...>
 
 	* lib/cgi*: split cgi.rb into four files. [ruby-dev:36041]
Index: io.c
===================================================================
--- io.c	(revision 19272)
+++ io.c	(revision 19273)
@@ -756,6 +756,10 @@
         if (fptr->writeconv) {
             if (!NIL_P(fptr->writeconv_stateless))
                 common_encoding = fptr->writeconv_stateless;
+            else if (!rb_enc_asciicompat(rb_enc_get(str))) {
+                rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s",
+                         rb_enc_name(rb_enc_get(str)));
+            }
         }
         else {
             if (fptr->encs.enc2)
@@ -3908,7 +3912,10 @@
     if ((fmode & FMODE_BINMODE) && (fmode & FMODE_TEXTMODE))
         rb_raise(rb_eArgError, "both textmode and binmode specified");
 
-    if (enc && !rb_enc_asciicompat(enc) && !(fmode & FMODE_BINMODE))
+    if ((fmode & FMODE_READABLE) &&
+        !enc2 &&
+        !(fmode & FMODE_BINMODE) &&
+        !rb_enc_asciicompat(enc ? enc : rb_default_external_encoding()))
         rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode");
 
     *vmode_p = vmode;
Index: test/ruby/test_io_m17n.rb
===================================================================
--- test/ruby/test_io_m17n.rb	(revision 19272)
+++ test/ruby/test_io_m17n.rb	(revision 19273)
@@ -678,6 +678,7 @@
 
   def test_getc_invalid3
     with_pipe("utf-16le:euc-jp") {|r, w|
+      w.binmode
       before1 = "\x42\x30".force_encoding("utf-16le")
       before2 = "\x44\x30".force_encoding("utf-16le")
       invalid = "\x00\xd8".force_encoding("utf-16le")
@@ -1226,34 +1227,31 @@
     }
   end
 
-  def test_textmode_read_ascii_incompat_internal
+  def test_read_newline_conversion_with_encoding_conversion
     with_tmpdir {
-      # ascii incompatible internal encoding needs binmode.
-      assert_raise(ArgumentError) {
-        open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f| }
+      generate_file("t.utf8.crlf", "a\r\nb\r\n")
+      open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f|
+        content = f.read
+        assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
       }
-      assert_raise(ArgumentError) {
-        open("t.utf8.crlf", "r:utf-8:utf-16be") {|f| }
+      open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f|
+        content = f.read
+        assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
       }
-      assert_raise(ArgumentError) {
-        open("t.utf16.crlf", "rt:utf-16be") {|f| }
+      open("t.utf8.crlf", "r:utf-8:utf-16be") {|f|
+        content = f.read
+        if system_newline == "\n"
+          assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
+        else
+          assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
+        end
       }
-      assert_raise(ArgumentError) {
-        open("t.utf16.crlf", "r:utf-16be") {|f| }
-      }
     }
   end
 
-  def test_binmode_read_ascii_incompat_internal
+  def test_read_newline_conversion_without_encoding_conversion
     with_tmpdir {
-      generate_file("t.utf8.crlf", "a\r\nb\r\n")
       generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n")
-      # ascii incompatible internal encoding needs binmode.
-      open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f|
-        content = f.read
-        assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
-                     content)
-      }
       open("t.utf16.crlf", "rb:utf-16be") {|f|
         content = f.read
         assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
@@ -1262,27 +1260,182 @@
     }
   end
 
-  def test_textmode_write_ascii_incompat_internal
+  def test_read_newline_conversion_error
     with_tmpdir {
-      # ascii incompatible internal encoding needs binmode.
+      generate_file("empty.txt", "")
+      # ascii incompatible encoding without conversion needs binmode.
       assert_raise(ArgumentError) {
-        open("t.utf8", "wt:utf-8:utf-16be") {|f| }
+        open("empty.txt", "rt:utf-16be") {|f| }
       }
       assert_raise(ArgumentError) {
-        open("t.utf8", "w:utf-8:utf-16be") {|f| }
+        open("empty.txt", "r:utf-16be") {|f| }
       }
-      assert_raise(ArgumentError) {
-        open("t.utf8", "w:utf-8:utf-16be") {|f| }
+    }
+  end
+
+  def test_read_mode
+    with_tmpdir {
+      generate_file("t", "a\rb\r\nc\n\xc2\xa2")
+      generate_file("ie", "a\rb\r\nc\n\e$B\x42\x22\e(B")
+      generate_file("iu", "a\rb\r\nc\n\e$B\x21\x71\e(B")
+      generate_file("be", "\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35")
+      generate_file("bu", "\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2")
+      # "\xc2\xa2" is valid as EUC-JP and UTF-8
+      #   EUC-JP        UTF-8           Unicode
+      #   0xC2A2        0xE894B5        U+8535
+      #   0xA1F1        0xC2A2          U+00A2
+
+      open("t","rt") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) }
+      open("t","rb") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) }
+
+      open("t","rt:euc-jp") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
+      open("t","rb:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
+      open("t","rt:utf-8") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
+      open("t","rb:utf-8") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
+      assert_raise(ArgumentError) { open("t", "rt:iso-2022-jp") {|f| } }
+      open("t","rb:iso-2022-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("ISO-2022-JP"), f.read) }
+
+      open("t","rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n\u8535", f.read) }
+      open("t","rt:utf-8:euc-jp") {|f| assert_equal("a\nb\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
+      open("t","rb:euc-jp:utf-8") {|f| assert_equal("a\rb\r\nc\n\u8535", f.read) }
+      open("t","rb:utf-8:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
+
+      open("t","rt:euc-jp:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"), f.read) }
+      open("t","rt:utf-8:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"), f.read) }
+      open("t","rt:euc-jp:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"), f.read) }
+      open("t","rt:utf-8:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"), f.read) }
+      open("t","rb:euc-jp:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
+      open("t","rb:utf-8:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"),f.read)}
+      open("t","rb:euc-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
+      open("t","rb:utf-8:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"),f.read)}
+
+      open("ie","rt:iso-2022-jp:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
+      open("iu","rt:iso-2022-jp:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
+      open("be","rt:utf-16be:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
+      open("bu","rt:utf-16be:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
+      open("ie","rb:iso-2022-jp:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
+      open("iu","rb:iso-2022-jp:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
+      open("be","rb:utf-16be:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
+      open("bu","rb:utf-16be:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
+
+      open("ie","rt:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
+      open("be","rt:utf-16be:iso-2022-jp"){|f|assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
+      open("ie","rb:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
+      open("be","rb:utf-16be:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
+    }
+  end
+
+  def assert_write(expected, mode, *args)
+    with_tmpdir {
+      open("t", mode) {|f|
+        args.each {|arg| f.print arg }
       }
-      assert_raise(ArgumentError) {
-        open("t.utf16", "wt:utf-16be") {|f| }
+      content = File.read("t", :mode=>"rb:ascii-8bit")
+      assert_equal(expected.dup.force_encoding("ascii-8bit"),
+                   content.force_encoding("ascii-8bit")) 
+    }
+  end
+
+  def test_write_mode
+    # "\xc2\xa2" is valid as EUC-JP and UTF-8
+    #   EUC-JP        UTF-8           Unicode
+    #   0xC2A2        0xE894B5        U+8535
+    #   0xA1F1        0xC2A2          U+00A2
+    a = "a\rb\r\nc\n"
+    e = "\xc2\xa2".force_encoding("euc-jp")
+    u8 = "\xc2\xa2".force_encoding("utf-8")
+    u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be")
+    i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp")
+    n = system_newline
+    un = n.encode("utf-16be").force_encoding("ascii-8bit")
+
+    assert_write("a\rb\r#{n}c#{n}", "wt", a)
+    assert_write("\xc2\xa2", "wt", e)
+    assert_write("\xc2\xa2", "wt", u8)
+
+    assert_write("a\rb\r\nc\n", "wb", a)
+    assert_write("\xc2\xa2", "wb", e)
+    assert_write("\xc2\xa2", "wb", u8)
+
+    #assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wt", u16) should raise
+    #assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wt", i) should raise
+    assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb", u16)
+    assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb", i)
+
+    t_write_mode_enc
+    t_write_mode_enc(":utf-8")
+  end
+
+  def t_write_mode_enc(enc="")
+    # "\xc2\xa2" is valid as EUC-JP and UTF-8
+    #   EUC-JP        UTF-8           Unicode
+    #   0xC2A2        0xE894B5        U+8535
+    #   0xA1F1        0xC2A2          U+00A2
+    a = "a\rb\r\nc\n"
+    e = "\xc2\xa2".force_encoding("euc-jp")
+    u8 = "\xc2\xa2".force_encoding("utf-8")
+    u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be")
+    i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp")
+    n = system_newline
+    un = n.encode("utf-16be").force_encoding("ascii-8bit")
+
+    assert_write("a\rb\r#{n}c#{n}", "wt:euc-jp#{enc}", a)
+    assert_write("\xc2\xa2", "wt:euc-jp#{enc}", e)
+    assert_write("\xa1\xf1", "wt:euc-jp#{enc}", u8)
+
+    assert_write("a\rb\r\nc\n", "wb:euc-jp#{enc}", a)
+    assert_write("\xc2\xa2", "wb:euc-jp#{enc}", e)
+    assert_write("\xa1\xf1", "wb:euc-jp#{enc}", u8)
+
+    assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", u16)
+    assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", i)
+    assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", u16)
+    assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", i)
+
+    assert_write("\0a\0\r\0b\0\r#{un}\0c#{un}", "wt:utf-16be#{enc}", a)
+    assert_write("\x85\x35", "wt:utf-16be#{enc}", e)
+    assert_write("\x00\xa2", "wt:utf-16be#{enc}", u8)
+    assert_write("a\rb\r#{n}c#{n}", "wt:iso-2022-jp#{enc}", a)
+    assert_write("\e$B\x42\x22\e(B", "wt:iso-2022-jp#{enc}", e)
+    assert_write("\e$B\x21\x71\e(B", "wt:iso-2022-jp#{enc}", u8)
+
+    assert_write("\0a\0\r\0b\0\r\0\n\0c\0\n", "wb:utf-16be#{enc}", a)
+    assert_write("\x85\x35", "wb:utf-16be#{enc}", e)
+    assert_write("\x00\xa2", "wb:utf-16be#{enc}", u8)
+    assert_write("a\rb\r\nc\n", "wb:iso-2022-jp#{enc}", a)
+    assert_write("\e$B\x42\x22\e(B", "wb:iso-2022-jp#{enc}", e)
+    assert_write("\e$B\x21\x71\e(B", "wb:iso-2022-jp#{enc}", u8)
+
+    assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", u16)
+    assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", i)
+    assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", u16)
+    assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", i)
+    assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", u16)
+    assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", i)
+    assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", u16)
+    assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", i)
+  end
+
+  def test_write_mode_fail
+    return if system_newline == "\n"
+    with_tmpdir {
+      open("t", "wt") {|f|
+        assert_raise(ArgumentError) { f.print "\0\r\0\r\0\n\0\n".force_encoding("utf-16be") }
       }
-      assert_raise(ArgumentError) {
-        open("t.utf16", "w:utf-16be") {|f| }
-      }
     }
   end
 
+  def test_write_ascii_incompat
+    with_tmpdir {
+      open("t.utf8", "wb:utf-8:utf-16be") {|f| }
+      open("t.utf8", "wt:utf-8:utf-16be") {|f| }
+      open("t.utf8", "w:utf-8:utf-16be") {|f| }
+      open("t.utf16", "wb:utf-16be") {|f| }
+      open("t.utf16", "wt:utf-16be") {|f| }
+      open("t.utf16", "w:utf-16be") {|f| }
+    }
+  end
+
   def test_binmode_write_ascii_incompat_internal
     with_tmpdir {
       open("t.utf8.lf", "wb:utf-8:utf-16be") {|f|

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]