[前][次][番号順一覧][スレッド一覧]

ruby-changes:56101

From: Nobuyoshi <ko1@a...>
Date: Thu, 13 Jun 2019 18:16:31 +0900 (JST)
Subject: [ruby-changes:56101] Nobuyoshi Nakada: e717d6faa8 (trunk): IO#set_encoding_by_bom

https://git.ruby-lang.org/ruby.git/commit/?id=e717d6faa8

From e717d6faa8463c70407e6aaf116c6b6181f30be6 Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@r...>
Date: Sat, 8 Jun 2019 21:35:33 +0900
Subject: IO#set_encoding_by_bom

* io.c (rb_io_set_encoding_by_bom): IO#set_encoding_by_bom to set
  the encoding by BOM if exists.  [Bug #15210]

diff --git a/NEWS b/NEWS
index c069f9e..4ad38c9 100644
--- a/NEWS
+++ b/NEWS
@@ -86,6 +86,13 @@ GC:: https://github.com/ruby/ruby/blob/trunk/NEWS#L86
       Details on the algorithm and caveats can be found here:
       https://bugs.ruby-lang.org/issues/15626
 
+IO::
+
+  New method::
+
+    * Added IO#set_encoding_by_bom to check the BOM and set the external
+      encoding.  [Bug #15210]
+
 Integer::
 
   Modified method::
diff --git a/io.c b/io.c
index 893cabc..0db981e 100644
--- a/io.c
+++ b/io.c
@@ -6170,20 +6170,23 @@ io_strip_bom(VALUE io) https://github.com/ruby/ruby/blob/trunk/io.c#L6170
     return 0;
 }
 
-static void
+static rb_encoding *
 io_set_encoding_by_bom(VALUE io)
 {
     int idx = io_strip_bom(io);
     rb_io_t *fptr;
+    rb_encoding *extenc = NULL;
 
     GetOpenFile(io, fptr);
     if (idx) {
-	io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)),
-		rb_io_internal_encoding(io), Qnil);
+        extenc = rb_enc_from_index(idx);
+        io_encoding_set(fptr, rb_enc_from_encoding(extenc),
+                        rb_io_internal_encoding(io), Qnil);
     }
     else {
 	fptr->encs.enc2 = NULL;
     }
+    return extenc;
 }
 
 static VALUE
@@ -8308,6 +8311,40 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io) https://github.com/ruby/ruby/blob/trunk/io.c#L8311
 
 /*
  *  call-seq:
+ *     ios.set_encoding_by_bom   -> encoding or nil
+ *
+ *  Checks if +ios+ starts with a BOM, and then consumes it and sets
+ *  the external encoding.  Returns the result encoding if found, or
+ *  nil.  If +ios+ is not binmode or its encoding has been set
+ *  already, an exception will be raised.
+ *
+ *    File.write("bom.txt", "\u{FEFF}abc")
+ *    ios = File.open("bom.txt", "rb")
+ *    ios.set_encoding_by_bom    #=>  #<Encoding:UTF-8>
+ *
+ *    File.write("nobom.txt", "abc")
+ *    ios = File.open("nobom.txt", "rb")
+ *    ios.set_encoding_by_bom    #=>  nil
+ */
+
+static VALUE
+rb_io_set_encoding_by_bom(VALUE io)
+{
+    rb_io_t *fptr;
+
+    GetOpenFile(io, fptr);
+    if (!(fptr->mode & FMODE_BINMODE)) {
+        rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode");
+    }
+    if (fptr->encs.enc2) {
+        rb_raise(rb_eArgError, "encoding conversion is set");
+    }
+    if (!io_set_encoding_by_bom(io)) return Qnil;
+    return rb_enc_from_encoding(fptr->encs.enc);
+}
+
+/*
+ *  call-seq:
  *     File.new(filename, mode="r" [, opt])            -> file
  *     File.new(filename [, mode [, perm]] [, opt])    -> file
  *
@@ -13319,6 +13356,7 @@ Init_IO(void) https://github.com/ruby/ruby/blob/trunk/io.c#L13356
     rb_define_method(rb_cIO, "external_encoding", rb_io_external_encoding, 0);
     rb_define_method(rb_cIO, "internal_encoding", rb_io_internal_encoding, 0);
     rb_define_method(rb_cIO, "set_encoding", rb_io_set_encoding, -1);
+    rb_define_method(rb_cIO, "set_encoding_by_bom", rb_io_set_encoding_by_bom, 0);
 
     rb_define_method(rb_cIO, "autoclose?", rb_io_autoclose_p, 0);
     rb_define_method(rb_cIO, "autoclose=", rb_io_set_autoclose, 1);
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 6fb8d8a..630f2ee 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -2097,6 +2097,10 @@ EOT https://github.com/ruby/ruby/blob/trunk/test/ruby/test_io_m17n.rb#L2097
           assert_equal(Encoding::UTF_8, result.encoding, message)
           assert_equal(stripped, result, message)
         end
+
+        File.open(path, "rb") {|f|
+          assert_equal(Encoding.find(name), f.set_encoding_by_bom)
+        }
       }
     end
   end
@@ -2139,6 +2143,10 @@ EOT https://github.com/ruby/ruby/blob/trunk/test/ruby/test_io_m17n.rb#L2143
       assert_equal(stripped, result, bug8323)
       result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
       assert_equal(stripped, result, bug8323)
+
+      File.open(path, "rb") {|f|
+        assert_nil(f.set_encoding_by_bom)
+      }
     }
   end
 
-- 
cgit v0.10.2


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]