[前][次][番号順一覧][スレッド一覧]

ruby-changes:23715

From: nobu <ko1@a...>
Date: Thu, 24 May 2012 12:08:01 +0900 (JST)
Subject: [ruby-changes:23715] nobu:r35766 (trunk): Bug #6487

nobu	2012-05-24 12:07:49 +0900 (Thu, 24 May 2012)

  New Revision: 35766

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=35766

  Log:
    Bug #6487
    
    * io.c (io_strip_bom): check EOF.  [Bug #6487][ruby-core:45203]

  Modified files:
    trunk/ChangeLog
    trunk/io.c
    trunk/test/ruby/test_file.rb

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 35765)
+++ ChangeLog	(revision 35766)
@@ -1,3 +1,7 @@
+Thu May 24 12:07:46 2012  Nobuyoshi Nakada  <nobu@r...>
+
+	* io.c (io_strip_bom): check EOF.  [Bug #6487][ruby-core:45203]
+
 Wed May 23 22:06:14 2012  NARUSE, Yui  <naruse@r...>
 
 	* lib/net/http/header.rb (Net::HTTPHeader#range): fix broken parser of
Index: io.c
===================================================================
--- io.c	(revision 35765)
+++ io.c	(revision 35766)
@@ -5099,65 +5099,63 @@
 static int
 io_strip_bom(VALUE io)
 {
-    int b1, b2, b3, b4;
-    switch (b1 = FIX2INT(rb_io_getbyte(io))) {
-      case 0xEF:
-	b2 = FIX2INT(rb_io_getbyte(io));
-	if (b2 == 0xBB) {
-	    b3 = FIX2INT(rb_io_getbyte(io));
-	    if (b3 == 0xBF) {
+    VALUE b1, b2, b3, b4;
+
+    if (NIL_P(b1 = rb_io_getbyte(io))) return 0;
+    switch (b1) {
+      case INT2FIX(0xEF):
+	if (NIL_P(b2 = rb_io_getbyte(io))) break;
+	if (b2 == INT2FIX(0xBB) && !NIL_P(b3 = rb_io_getbyte(io))) {
+	    if (b3 == INT2FIX(0xBF)) {
 		return rb_utf8_encindex();
 	    }
-	    rb_io_ungetbyte(io, INT2FIX(b3));
+	    rb_io_ungetbyte(io, b3);
 	}
-	rb_io_ungetbyte(io, INT2FIX(b2));
+	rb_io_ungetbyte(io, b2);
 	break;
 
-      case 0xFE:
-	b2 = FIX2INT(rb_io_getbyte(io));
-	if (b2 == 0xFF) {
+      case INT2FIX(0xFE):
+	if (NIL_P(b2 = rb_io_getbyte(io))) break;
+	if (b2 == INT2FIX(0xFF)) {
 	    return rb_enc_find_index("UTF-16BE");
 	}
-	rb_io_ungetbyte(io, INT2FIX(b2));
+	rb_io_ungetbyte(io, b2);
 	break;
 
-      case 0xFF:
-	b2 = FIX2INT(rb_io_getbyte(io));
-	if (b2 == 0xFE) {
-	    b3 = FIX2INT(rb_io_getbyte(io));
-	    if (b3 == 0) {
-		b4 = FIX2INT(rb_io_getbyte(io));
-		if (b4 == 0) {
+      case INT2FIX(0xFF):
+	if (NIL_P(b2 = rb_io_getbyte(io))) break;
+	if (b2 == INT2FIX(0xFE)) {
+	    b3 = rb_io_getbyte(io);
+	    if (b3 == INT2FIX(0) && !NIL_P(b4 = rb_io_getbyte(io))) {
+		if (b4 == INT2FIX(0)) {
 		    return rb_enc_find_index("UTF-32LE");
 		}
-		rb_io_ungetbyte(io, INT2FIX(b4));
+		rb_io_ungetbyte(io, b4);
+		rb_io_ungetbyte(io, b3);
 	    }
 	    else {
-		rb_io_ungetbyte(io, INT2FIX(b3));
+		rb_io_ungetbyte(io, b3);
 		return rb_enc_find_index("UTF-16LE");
 	    }
-	    rb_io_ungetbyte(io, INT2FIX(b3));
 	}
-	rb_io_ungetbyte(io, INT2FIX(b2));
+	rb_io_ungetbyte(io, b2);
 	break;
 
-      case 0:
-	b2 = FIX2INT(rb_io_getbyte(io));
-	if (b2 == 0) {
-	    b3 = FIX2INT(rb_io_getbyte(io));
-	    if (b3 == 0xFE) {
-		b4 = FIX2INT(rb_io_getbyte(io));
-		if (b4 == 0xFF) {
+      case INT2FIX(0):
+	if (NIL_P(b2 = rb_io_getbyte(io))) break;
+	if (b2 == INT2FIX(0) && !NIL_P(b3 = rb_io_getbyte(io))) {
+	    if (b3 == INT2FIX(0xFE) && !NIL_P(b4 = rb_io_getbyte(io))) {
+		if (b4 == INT2FIX(0xFF)) {
 		    return rb_enc_find_index("UTF-32BE");
 		}
-		rb_io_ungetbyte(io, INT2FIX(b4));
+		rb_io_ungetbyte(io, b4);
 	    }
-	    rb_io_ungetbyte(io, INT2FIX(b3));
+	    rb_io_ungetbyte(io, b3);
 	}
-	rb_io_ungetbyte(io, INT2FIX(b2));
+	rb_io_ungetbyte(io, b2);
 	break;
     }
-    rb_io_ungetbyte(io, INT2FIX(b1));
+    rb_io_ungetbyte(io, b1);
     return 0;
 }
 
Index: test/ruby/test_file.rb
===================================================================
--- test/ruby/test_file.rb	(revision 35765)
+++ test/ruby/test_file.rb	(revision 35766)
@@ -38,6 +38,57 @@
 
   include TestEOF::Seek
 
+  def test_empty_file_bom
+    bug6487 = '[ruby-core:45203]'
+    f = Tempfile.new(__method__.to_s)
+    f.close
+    assert File.exist? f.path
+    assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:utf-8')}
+    assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:bom|utf-8')}
+    f.close(true)
+  end
+
+  def assert_bom(bytes, name)
+    bug6487 = '[ruby-core:45203]'
+
+    f = Tempfile.new(name.to_s)
+    f.sync = true
+    expected = ""
+    result = nil
+    bytes[0...-1].each do |x|
+      f.write x
+      f.write ' '
+      f.pos -= 1
+      expected << x
+      assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')}
+      assert_equal("#{expected} ".force_encoding("utf-8"), result)
+    end
+    f.write bytes[-1]
+    assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')}
+    assert_equal '', result, "valid bom"
+    f.close(true)
+  end
+
+  def test_bom_8
+    assert_bom(["\xEF", "\xBB", "\xBF"], __method__)
+  end
+
+  def test_bom_16be
+    assert_bom(["\xFE", "\xFF"], __method__)
+  end
+
+  def test_bom_16le
+    assert_bom(["\xFF", "\xFE"], __method__)
+  end
+
+  def test_bom_32be
+    assert_bom(["\0", "\0", "\xFE", "\xFF"], __method__)
+  end
+
+  def test_bom_32le
+    assert_bom(["\xFF\xFE\0", "\0"], __method__)
+  end
+
   def test_truncate_wbuf
     f = Tempfile.new("test-truncate")
     f.print "abc"

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]