[前][次][番号順一覧][スレッド一覧]

ruby-changes:23731

From: naruse <ko1@a...>
Date: Fri, 25 May 2012 10:39:42 +0900 (JST)
Subject: [ruby-changes:23731] naruse:r35782 (ruby_1_9_3): merge revision(s) 35766:

naruse	2012-05-25 10:39:30 +0900 (Fri, 25 May 2012)

  New Revision: 35782

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=35782

  Log:
    merge revision(s) 35766:
    
    * io.c (io_strip_bom): check EOF.  [Bug #6487][ruby-core:45203]

  Modified files:
    branches/ruby_1_9_3/ChangeLog
    branches/ruby_1_9_3/io.c
    branches/ruby_1_9_3/test/ruby/test_file.rb
    branches/ruby_1_9_3/version.h

Index: ruby_1_9_3/ChangeLog
===================================================================
--- ruby_1_9_3/ChangeLog	(revision 35781)
+++ ruby_1_9_3/ChangeLog	(revision 35782)
@@ -1,3 +1,7 @@
+Fri May 25 10:38:06 2012  Nobuyoshi Nakada  <nobu@r...>
+
+	* io.c (io_strip_bom): check EOF.  [Bug #6487][ruby-core:45203]
+
 Fri May 25 10:36:38 2012  Nobuyoshi Nakada  <nobu@r...>
 
 	* parse.y (f_arglist): should reset lexical states after empty
Index: ruby_1_9_3/io.c
===================================================================
--- ruby_1_9_3/io.c	(revision 35781)
+++ ruby_1_9_3/io.c	(revision 35782)
@@ -4900,65 +4900,63 @@
 static int
 io_strip_bom(VALUE io)
 {
-    int b1, b2, b3, b4;
-    switch (b1 = FIX2INT(rb_io_getbyte(io))) {
-      case 0xEF:
-	b2 = FIX2INT(rb_io_getbyte(io));
-	if (b2 == 0xBB) {
-	    b3 = FIX2INT(rb_io_getbyte(io));
-	    if (b3 == 0xBF) {
+    VALUE b1, b2, b3, b4;
+
+    if (NIL_P(b1 = rb_io_getbyte(io))) return 0;
+    switch (b1) {
+      case INT2FIX(0xEF):
+	if (NIL_P(b2 = rb_io_getbyte(io))) break;
+	if (b2 == INT2FIX(0xBB) && !NIL_P(b3 = rb_io_getbyte(io))) {
+	    if (b3 == INT2FIX(0xBF)) {
 		return rb_utf8_encindex();
 	    }
-	    rb_io_ungetbyte(io, INT2FIX(b3));
+	    rb_io_ungetbyte(io, b3);
 	}
-	rb_io_ungetbyte(io, INT2FIX(b2));
+	rb_io_ungetbyte(io, b2);
 	break;
 
-      case 0xFE:
-	b2 = FIX2INT(rb_io_getbyte(io));
-	if (b2 == 0xFF) {
+      case INT2FIX(0xFE):
+	if (NIL_P(b2 = rb_io_getbyte(io))) break;
+	if (b2 == INT2FIX(0xFF)) {
 	    return rb_enc_find_index("UTF-16BE");
 	}
-	rb_io_ungetbyte(io, INT2FIX(b2));
+	rb_io_ungetbyte(io, b2);
 	break;
 
-      case 0xFF:
-	b2 = FIX2INT(rb_io_getbyte(io));
-	if (b2 == 0xFE) {
-	    b3 = FIX2INT(rb_io_getbyte(io));
-	    if (b3 == 0) {
-		b4 = FIX2INT(rb_io_getbyte(io));
-		if (b4 == 0) {
+      case INT2FIX(0xFF):
+	if (NIL_P(b2 = rb_io_getbyte(io))) break;
+	if (b2 == INT2FIX(0xFE)) {
+	    b3 = rb_io_getbyte(io);
+	    if (b3 == INT2FIX(0) && !NIL_P(b4 = rb_io_getbyte(io))) {
+		if (b4 == INT2FIX(0)) {
 		    return rb_enc_find_index("UTF-32LE");
 		}
-		rb_io_ungetbyte(io, INT2FIX(b4));
+		rb_io_ungetbyte(io, b4);
+		rb_io_ungetbyte(io, b3);
 	    }
 	    else {
-		rb_io_ungetbyte(io, INT2FIX(b3));
+		rb_io_ungetbyte(io, b3);
 		return rb_enc_find_index("UTF-16LE");
 	    }
-	    rb_io_ungetbyte(io, INT2FIX(b3));
 	}
-	rb_io_ungetbyte(io, INT2FIX(b2));
+	rb_io_ungetbyte(io, b2);
 	break;
 
-      case 0:
-	b2 = FIX2INT(rb_io_getbyte(io));
-	if (b2 == 0) {
-	    b3 = FIX2INT(rb_io_getbyte(io));
-	    if (b3 == 0xFE) {
-		b4 = FIX2INT(rb_io_getbyte(io));
-		if (b4 == 0xFF) {
+      case INT2FIX(0):
+	if (NIL_P(b2 = rb_io_getbyte(io))) break;
+	if (b2 == INT2FIX(0) && !NIL_P(b3 = rb_io_getbyte(io))) {
+	    if (b3 == INT2FIX(0xFE) && !NIL_P(b4 = rb_io_getbyte(io))) {
+		if (b4 == INT2FIX(0xFF)) {
 		    return rb_enc_find_index("UTF-32BE");
 		}
-		rb_io_ungetbyte(io, INT2FIX(b4));
+		rb_io_ungetbyte(io, b4);
 	    }
-	    rb_io_ungetbyte(io, INT2FIX(b3));
+	    rb_io_ungetbyte(io, b3);
 	}
-	rb_io_ungetbyte(io, INT2FIX(b2));
+	rb_io_ungetbyte(io, b2);
 	break;
     }
-    rb_io_ungetbyte(io, INT2FIX(b1));
+    rb_io_ungetbyte(io, b1);
     return 0;
 }
 
Index: ruby_1_9_3/version.h
===================================================================
--- ruby_1_9_3/version.h	(revision 35781)
+++ ruby_1_9_3/version.h	(revision 35782)
@@ -1,5 +1,5 @@
 #define RUBY_VERSION "1.9.3"
-#define RUBY_PATCHLEVEL 227
+#define RUBY_PATCHLEVEL 228
 
 #define RUBY_RELEASE_DATE "2012-05-25"
 #define RUBY_RELEASE_YEAR 2012
Index: ruby_1_9_3/test/ruby/test_file.rb
===================================================================
--- ruby_1_9_3/test/ruby/test_file.rb	(revision 35781)
+++ ruby_1_9_3/test/ruby/test_file.rb	(revision 35782)
@@ -37,6 +37,57 @@
 
   include TestEOF::Seek
 
+  def test_empty_file_bom
+    bug6487 = '[ruby-core:45203]'
+    f = Tempfile.new(__method__.to_s)
+    f.close
+    assert File.exist? f.path
+    assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:utf-8')}
+    assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:bom|utf-8')}
+    f.close(true)
+  end
+
+  def assert_bom(bytes, name)
+    bug6487 = '[ruby-core:45203]'
+
+    f = Tempfile.new(name.to_s)
+    f.sync = true
+    expected = ""
+    result = nil
+    bytes[0...-1].each do |x|
+      f.write x
+      f.write ' '
+      f.pos -= 1
+      expected << x
+      assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')}
+      assert_equal("#{expected} ".force_encoding("utf-8"), result)
+    end
+    f.write bytes[-1]
+    assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')}
+    assert_equal '', result, "valid bom"
+    f.close(true)
+  end
+
+  def test_bom_8
+    assert_bom(["\xEF", "\xBB", "\xBF"], __method__)
+  end
+
+  def test_bom_16be
+    assert_bom(["\xFE", "\xFF"], __method__)
+  end
+
+  def test_bom_16le
+    assert_bom(["\xFF", "\xFE"], __method__)
+  end
+
+  def test_bom_32be
+    assert_bom(["\0", "\0", "\xFE", "\xFF"], __method__)
+  end
+
+  def test_bom_32le
+    assert_bom(["\xFF\xFE\0", "\0"], __method__)
+  end
+
   def test_truncate_wbuf
     f = Tempfile.new("test-truncate")
     f.print "abc"

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]