ruby-changes:7752
From: akr <ko1@a...>
Date: Tue, 9 Sep 2008 23:57:12 +0900 (JST)
Subject: [ruby-changes:7752] Ruby:r19273 (trunk): * io.c (io_fwrite): raise an error if ASCII incompatible string
akr 2008-09-09 23:56:55 +0900 (Tue, 09 Sep 2008) New Revision: 19273 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19273 Log: * io.c (io_fwrite): raise an error if ASCII incompatible string written for text mode IO without encoding conversion. (rb_io_extract_modeenc): binmode requirement changed. Modified files: trunk/ChangeLog trunk/io.c trunk/test/ruby/test_io_m17n.rb Index: ChangeLog =================================================================== --- ChangeLog (revision 19272) +++ ChangeLog (revision 19273) @@ -1,3 +1,9 @@ +Tue Sep 9 23:55:26 2008 Tanaka Akira <akr@f...> + + * io.c (io_fwrite): raise an error if ASCII incompatible string + written for text mode IO without encoding conversion. + (rb_io_extract_modeenc): binmode requirement changed. + Tue Sep 9 21:59:48 2008 Takeyuki Fujioka <xibbar@r...> * lib/cgi*: split cgi.rb into four files. [ruby-dev:36041] Index: io.c =================================================================== --- io.c (revision 19272) +++ io.c (revision 19273) @@ -756,6 +756,10 @@ if (fptr->writeconv) { if (!NIL_P(fptr->writeconv_stateless)) common_encoding = fptr->writeconv_stateless; + else if (!rb_enc_asciicompat(rb_enc_get(str))) { + rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s", + rb_enc_name(rb_enc_get(str))); + } } else { if (fptr->encs.enc2) @@ -3908,7 +3912,10 @@ if ((fmode & FMODE_BINMODE) && (fmode & FMODE_TEXTMODE)) rb_raise(rb_eArgError, "both textmode and binmode specified"); - if (enc && !rb_enc_asciicompat(enc) && !(fmode & FMODE_BINMODE)) + if ((fmode & FMODE_READABLE) && + !enc2 && + !(fmode & FMODE_BINMODE) && + !rb_enc_asciicompat(enc ? enc : rb_default_external_encoding())) rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode"); *vmode_p = vmode; Index: test/ruby/test_io_m17n.rb =================================================================== --- test/ruby/test_io_m17n.rb (revision 19272) +++ test/ruby/test_io_m17n.rb (revision 19273) @@ -678,6 +678,7 @@ def test_getc_invalid3 with_pipe("utf-16le:euc-jp") {|r, w| + w.binmode before1 = "\x42\x30".force_encoding("utf-16le") before2 = "\x44\x30".force_encoding("utf-16le") invalid = "\x00\xd8".force_encoding("utf-16le") @@ -1226,34 +1227,31 @@ } end - def test_textmode_read_ascii_incompat_internal + def test_read_newline_conversion_with_encoding_conversion with_tmpdir { - # ascii incompatible internal encoding needs binmode. - assert_raise(ArgumentError) { - open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f| } + generate_file("t.utf8.crlf", "a\r\nb\r\n") + open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f| + content = f.read + assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content) } - assert_raise(ArgumentError) { - open("t.utf8.crlf", "r:utf-8:utf-16be") {|f| } + open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f| + content = f.read + assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content) } - assert_raise(ArgumentError) { - open("t.utf16.crlf", "rt:utf-16be") {|f| } + open("t.utf8.crlf", "r:utf-8:utf-16be") {|f| + content = f.read + if system_newline == "\n" + assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content) + else + assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content) + end } - assert_raise(ArgumentError) { - open("t.utf16.crlf", "r:utf-16be") {|f| } - } } end - def test_binmode_read_ascii_incompat_internal + def test_read_newline_conversion_without_encoding_conversion with_tmpdir { - generate_file("t.utf8.crlf", "a\r\nb\r\n") generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n") - # ascii incompatible internal encoding needs binmode. - open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f| - content = f.read - assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), - content) - } open("t.utf16.crlf", "rb:utf-16be") {|f| content = f.read assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), @@ -1262,27 +1260,182 @@ } end - def test_textmode_write_ascii_incompat_internal + def test_read_newline_conversion_error with_tmpdir { - # ascii incompatible internal encoding needs binmode. + generate_file("empty.txt", "") + # ascii incompatible encoding without conversion needs binmode. assert_raise(ArgumentError) { - open("t.utf8", "wt:utf-8:utf-16be") {|f| } + open("empty.txt", "rt:utf-16be") {|f| } } assert_raise(ArgumentError) { - open("t.utf8", "w:utf-8:utf-16be") {|f| } + open("empty.txt", "r:utf-16be") {|f| } } - assert_raise(ArgumentError) { - open("t.utf8", "w:utf-8:utf-16be") {|f| } + } + end + + def test_read_mode + with_tmpdir { + generate_file("t", "a\rb\r\nc\n\xc2\xa2") + generate_file("ie", "a\rb\r\nc\n\e$B\x42\x22\e(B") + generate_file("iu", "a\rb\r\nc\n\e$B\x21\x71\e(B") + generate_file("be", "\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35") + generate_file("bu", "\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2") + # "\xc2\xa2" is valid as EUC-JP and UTF-8 + # EUC-JP UTF-8 Unicode + # 0xC2A2 0xE894B5 U+8535 + # 0xA1F1 0xC2A2 U+00A2 + + open("t","rt") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) } + open("t","rb") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) } + + open("t","rt:euc-jp") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) } + open("t","rb:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) } + open("t","rt:utf-8") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) } + open("t","rb:utf-8") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) } + assert_raise(ArgumentError) { open("t", "rt:iso-2022-jp") {|f| } } + open("t","rb:iso-2022-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("ISO-2022-JP"), f.read) } + + open("t","rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n\u8535", f.read) } + open("t","rt:utf-8:euc-jp") {|f| assert_equal("a\nb\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) } + open("t","rb:euc-jp:utf-8") {|f| assert_equal("a\rb\r\nc\n\u8535", f.read) } + open("t","rb:utf-8:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) } + + open("t","rt:euc-jp:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"), f.read) } + open("t","rt:utf-8:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"), f.read) } + open("t","rt:euc-jp:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"), f.read) } + open("t","rt:utf-8:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"), f.read) } + open("t","rb:euc-jp:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)} + open("t","rb:utf-8:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"),f.read)} + open("t","rb:euc-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)} + open("t","rb:utf-8:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"),f.read)} + + open("ie","rt:iso-2022-jp:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) } + open("iu","rt:iso-2022-jp:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) } + open("be","rt:utf-16be:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) } + open("bu","rt:utf-16be:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) } + open("ie","rb:iso-2022-jp:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)} + open("iu","rb:iso-2022-jp:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)} + open("be","rb:utf-16be:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)} + open("bu","rb:utf-16be:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)} + + open("ie","rt:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)} + open("be","rt:utf-16be:iso-2022-jp"){|f|assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)} + open("ie","rb:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)} + open("be","rb:utf-16be:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)} + } + end + + def assert_write(expected, mode, *args) + with_tmpdir { + open("t", mode) {|f| + args.each {|arg| f.print arg } } - assert_raise(ArgumentError) { - open("t.utf16", "wt:utf-16be") {|f| } + content = File.read("t", :mode=>"rb:ascii-8bit") + assert_equal(expected.dup.force_encoding("ascii-8bit"), + content.force_encoding("ascii-8bit")) + } + end + + def test_write_mode + # "\xc2\xa2" is valid as EUC-JP and UTF-8 + # EUC-JP UTF-8 Unicode + # 0xC2A2 0xE894B5 U+8535 + # 0xA1F1 0xC2A2 U+00A2 + a = "a\rb\r\nc\n" + e = "\xc2\xa2".force_encoding("euc-jp") + u8 = "\xc2\xa2".force_encoding("utf-8") + u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be") + i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp") + n = system_newline + un = n.encode("utf-16be").force_encoding("ascii-8bit") + + assert_write("a\rb\r#{n}c#{n}", "wt", a) + assert_write("\xc2\xa2", "wt", e) + assert_write("\xc2\xa2", "wt", u8) + + assert_write("a\rb\r\nc\n", "wb", a) + assert_write("\xc2\xa2", "wb", e) + assert_write("\xc2\xa2", "wb", u8) + + #assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wt", u16) should raise + #assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wt", i) should raise + assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb", u16) + assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb", i) + + t_write_mode_enc + t_write_mode_enc(":utf-8") + end + + def t_write_mode_enc(enc="") + # "\xc2\xa2" is valid as EUC-JP and UTF-8 + # EUC-JP UTF-8 Unicode + # 0xC2A2 0xE894B5 U+8535 + # 0xA1F1 0xC2A2 U+00A2 + a = "a\rb\r\nc\n" + e = "\xc2\xa2".force_encoding("euc-jp") + u8 = "\xc2\xa2".force_encoding("utf-8") + u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be") + i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp") + n = system_newline + un = n.encode("utf-16be").force_encoding("ascii-8bit") + + assert_write("a\rb\r#{n}c#{n}", "wt:euc-jp#{enc}", a) + assert_write("\xc2\xa2", "wt:euc-jp#{enc}", e) + assert_write("\xa1\xf1", "wt:euc-jp#{enc}", u8) + + assert_write("a\rb\r\nc\n", "wb:euc-jp#{enc}", a) + assert_write("\xc2\xa2", "wb:euc-jp#{enc}", e) + assert_write("\xa1\xf1", "wb:euc-jp#{enc}", u8) + + assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", u16) + assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", i) + assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", u16) + assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", i) + + assert_write("\0a\0\r\0b\0\r#{un}\0c#{un}", "wt:utf-16be#{enc}", a) + assert_write("\x85\x35", "wt:utf-16be#{enc}", e) + assert_write("\x00\xa2", "wt:utf-16be#{enc}", u8) + assert_write("a\rb\r#{n}c#{n}", "wt:iso-2022-jp#{enc}", a) + assert_write("\e$B\x42\x22\e(B", "wt:iso-2022-jp#{enc}", e) + assert_write("\e$B\x21\x71\e(B", "wt:iso-2022-jp#{enc}", u8) + + assert_write("\0a\0\r\0b\0\r\0\n\0c\0\n", "wb:utf-16be#{enc}", a) + assert_write("\x85\x35", "wb:utf-16be#{enc}", e) + assert_write("\x00\xa2", "wb:utf-16be#{enc}", u8) + assert_write("a\rb\r\nc\n", "wb:iso-2022-jp#{enc}", a) + assert_write("\e$B\x42\x22\e(B", "wb:iso-2022-jp#{enc}", e) + assert_write("\e$B\x21\x71\e(B", "wb:iso-2022-jp#{enc}", u8) + + assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", u16) + assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", i) + assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", u16) + assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", i) + assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", u16) + assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", i) + assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", u16) + assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", i) + end + + def test_write_mode_fail + return if system_newline == "\n" + with_tmpdir { + open("t", "wt") {|f| + assert_raise(ArgumentError) { f.print "\0\r\0\r\0\n\0\n".force_encoding("utf-16be") } } - assert_raise(ArgumentError) { - open("t.utf16", "w:utf-16be") {|f| } - } } end + def test_write_ascii_incompat + with_tmpdir { + open("t.utf8", "wb:utf-8:utf-16be") {|f| } + open("t.utf8", "wt:utf-8:utf-16be") {|f| } + open("t.utf8", "w:utf-8:utf-16be") {|f| } + open("t.utf16", "wb:utf-16be") {|f| } + open("t.utf16", "wt:utf-16be") {|f| } + open("t.utf16", "w:utf-16be") {|f| } + } + end + def test_binmode_write_ascii_incompat_internal with_tmpdir { open("t.utf8.lf", "wb:utf-8:utf-16be") {|f| -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/