ruby-changes:23731
From: naruse <ko1@a...>
Date: Fri, 25 May 2012 10:39:42 +0900 (JST)
Subject: [ruby-changes:23731] naruse:r35782 (ruby_1_9_3): merge revision(s) 35766:
naruse 2012-05-25 10:39:30 +0900 (Fri, 25 May 2012) New Revision: 35782 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=35782 Log: merge revision(s) 35766: * io.c (io_strip_bom): check EOF. [Bug #6487][ruby-core:45203] Modified files: branches/ruby_1_9_3/ChangeLog branches/ruby_1_9_3/io.c branches/ruby_1_9_3/test/ruby/test_file.rb branches/ruby_1_9_3/version.h Index: ruby_1_9_3/ChangeLog =================================================================== --- ruby_1_9_3/ChangeLog (revision 35781) +++ ruby_1_9_3/ChangeLog (revision 35782) @@ -1,3 +1,7 @@ +Fri May 25 10:38:06 2012 Nobuyoshi Nakada <nobu@r...> + + * io.c (io_strip_bom): check EOF. [Bug #6487][ruby-core:45203] + Fri May 25 10:36:38 2012 Nobuyoshi Nakada <nobu@r...> * parse.y (f_arglist): should reset lexical states after empty Index: ruby_1_9_3/io.c =================================================================== --- ruby_1_9_3/io.c (revision 35781) +++ ruby_1_9_3/io.c (revision 35782) @@ -4900,65 +4900,63 @@ static int io_strip_bom(VALUE io) { - int b1, b2, b3, b4; - switch (b1 = FIX2INT(rb_io_getbyte(io))) { - case 0xEF: - b2 = FIX2INT(rb_io_getbyte(io)); - if (b2 == 0xBB) { - b3 = FIX2INT(rb_io_getbyte(io)); - if (b3 == 0xBF) { + VALUE b1, b2, b3, b4; + + if (NIL_P(b1 = rb_io_getbyte(io))) return 0; + switch (b1) { + case INT2FIX(0xEF): + if (NIL_P(b2 = rb_io_getbyte(io))) break; + if (b2 == INT2FIX(0xBB) && !NIL_P(b3 = rb_io_getbyte(io))) { + if (b3 == INT2FIX(0xBF)) { return rb_utf8_encindex(); } - rb_io_ungetbyte(io, INT2FIX(b3)); + rb_io_ungetbyte(io, b3); } - rb_io_ungetbyte(io, INT2FIX(b2)); + rb_io_ungetbyte(io, b2); break; - case 0xFE: - b2 = FIX2INT(rb_io_getbyte(io)); - if (b2 == 0xFF) { + case INT2FIX(0xFE): + if (NIL_P(b2 = rb_io_getbyte(io))) break; + if (b2 == INT2FIX(0xFF)) { return rb_enc_find_index("UTF-16BE"); } - rb_io_ungetbyte(io, INT2FIX(b2)); + rb_io_ungetbyte(io, b2); break; - case 0xFF: - b2 = FIX2INT(rb_io_getbyte(io)); - if (b2 == 0xFE) { - b3 = FIX2INT(rb_io_getbyte(io)); - if (b3 == 0) { - b4 = FIX2INT(rb_io_getbyte(io)); - if (b4 == 0) { + case INT2FIX(0xFF): + if (NIL_P(b2 = rb_io_getbyte(io))) break; + if (b2 == INT2FIX(0xFE)) { + b3 = rb_io_getbyte(io); + if (b3 == INT2FIX(0) && !NIL_P(b4 = rb_io_getbyte(io))) { + if (b4 == INT2FIX(0)) { return rb_enc_find_index("UTF-32LE"); } - rb_io_ungetbyte(io, INT2FIX(b4)); + rb_io_ungetbyte(io, b4); + rb_io_ungetbyte(io, b3); } else { - rb_io_ungetbyte(io, INT2FIX(b3)); + rb_io_ungetbyte(io, b3); return rb_enc_find_index("UTF-16LE"); } - rb_io_ungetbyte(io, INT2FIX(b3)); } - rb_io_ungetbyte(io, INT2FIX(b2)); + rb_io_ungetbyte(io, b2); break; - case 0: - b2 = FIX2INT(rb_io_getbyte(io)); - if (b2 == 0) { - b3 = FIX2INT(rb_io_getbyte(io)); - if (b3 == 0xFE) { - b4 = FIX2INT(rb_io_getbyte(io)); - if (b4 == 0xFF) { + case INT2FIX(0): + if (NIL_P(b2 = rb_io_getbyte(io))) break; + if (b2 == INT2FIX(0) && !NIL_P(b3 = rb_io_getbyte(io))) { + if (b3 == INT2FIX(0xFE) && !NIL_P(b4 = rb_io_getbyte(io))) { + if (b4 == INT2FIX(0xFF)) { return rb_enc_find_index("UTF-32BE"); } - rb_io_ungetbyte(io, INT2FIX(b4)); + rb_io_ungetbyte(io, b4); } - rb_io_ungetbyte(io, INT2FIX(b3)); + rb_io_ungetbyte(io, b3); } - rb_io_ungetbyte(io, INT2FIX(b2)); + rb_io_ungetbyte(io, b2); break; } - rb_io_ungetbyte(io, INT2FIX(b1)); + rb_io_ungetbyte(io, b1); return 0; } Index: ruby_1_9_3/version.h =================================================================== --- ruby_1_9_3/version.h (revision 35781) +++ ruby_1_9_3/version.h (revision 35782) @@ -1,5 +1,5 @@ #define RUBY_VERSION "1.9.3" -#define RUBY_PATCHLEVEL 227 +#define RUBY_PATCHLEVEL 228 #define RUBY_RELEASE_DATE "2012-05-25" #define RUBY_RELEASE_YEAR 2012 Index: ruby_1_9_3/test/ruby/test_file.rb =================================================================== --- ruby_1_9_3/test/ruby/test_file.rb (revision 35781) +++ ruby_1_9_3/test/ruby/test_file.rb (revision 35782) @@ -37,6 +37,57 @@ include TestEOF::Seek + def test_empty_file_bom + bug6487 = '[ruby-core:45203]' + f = Tempfile.new(__method__.to_s) + f.close + assert File.exist? f.path + assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:utf-8')} + assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:bom|utf-8')} + f.close(true) + end + + def assert_bom(bytes, name) + bug6487 = '[ruby-core:45203]' + + f = Tempfile.new(name.to_s) + f.sync = true + expected = "" + result = nil + bytes[0...-1].each do |x| + f.write x + f.write ' ' + f.pos -= 1 + expected << x + assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')} + assert_equal("#{expected} ".force_encoding("utf-8"), result) + end + f.write bytes[-1] + assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')} + assert_equal '', result, "valid bom" + f.close(true) + end + + def test_bom_8 + assert_bom(["\xEF", "\xBB", "\xBF"], __method__) + end + + def test_bom_16be + assert_bom(["\xFE", "\xFF"], __method__) + end + + def test_bom_16le + assert_bom(["\xFF", "\xFE"], __method__) + end + + def test_bom_32be + assert_bom(["\0", "\0", "\xFE", "\xFF"], __method__) + end + + def test_bom_32le + assert_bom(["\xFF\xFE\0", "\0"], __method__) + end + def test_truncate_wbuf f = Tempfile.new("test-truncate") f.print "abc" -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/