ruby-changes:56101
From: Nobuyoshi <ko1@a...>
Date: Thu, 13 Jun 2019 18:16:31 +0900 (JST)
Subject: [ruby-changes:56101] Nobuyoshi Nakada: e717d6faa8 (trunk): IO#set_encoding_by_bom
https://git.ruby-lang.org/ruby.git/commit/?id=e717d6faa8 From e717d6faa8463c70407e6aaf116c6b6181f30be6 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada <nobu@r...> Date: Sat, 8 Jun 2019 21:35:33 +0900 Subject: IO#set_encoding_by_bom * io.c (rb_io_set_encoding_by_bom): IO#set_encoding_by_bom to set the encoding by BOM if exists. [Bug #15210] diff --git a/NEWS b/NEWS index c069f9e..4ad38c9 100644 --- a/NEWS +++ b/NEWS @@ -86,6 +86,13 @@ GC:: https://github.com/ruby/ruby/blob/trunk/NEWS#L86 Details on the algorithm and caveats can be found here: https://bugs.ruby-lang.org/issues/15626 +IO:: + + New method:: + + * Added IO#set_encoding_by_bom to check the BOM and set the external + encoding. [Bug #15210] + Integer:: Modified method:: diff --git a/io.c b/io.c index 893cabc..0db981e 100644 --- a/io.c +++ b/io.c @@ -6170,20 +6170,23 @@ io_strip_bom(VALUE io) https://github.com/ruby/ruby/blob/trunk/io.c#L6170 return 0; } -static void +static rb_encoding * io_set_encoding_by_bom(VALUE io) { int idx = io_strip_bom(io); rb_io_t *fptr; + rb_encoding *extenc = NULL; GetOpenFile(io, fptr); if (idx) { - io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)), - rb_io_internal_encoding(io), Qnil); + extenc = rb_enc_from_index(idx); + io_encoding_set(fptr, rb_enc_from_encoding(extenc), + rb_io_internal_encoding(io), Qnil); } else { fptr->encs.enc2 = NULL; } + return extenc; } static VALUE @@ -8308,6 +8311,40 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io) https://github.com/ruby/ruby/blob/trunk/io.c#L8311 /* * call-seq: + * ios.set_encoding_by_bom -> encoding or nil + * + * Checks if +ios+ starts with a BOM, and then consumes it and sets + * the external encoding. Returns the result encoding if found, or + * nil. If +ios+ is not binmode or its encoding has been set + * already, an exception will be raised. + * + * File.write("bom.txt", "\u{FEFF}abc") + * ios = File.open("bom.txt", "rb") + * ios.set_encoding_by_bom #=> #<Encoding:UTF-8> + * + * File.write("nobom.txt", "abc") + * ios = File.open("nobom.txt", "rb") + * ios.set_encoding_by_bom #=> nil + */ + +static VALUE +rb_io_set_encoding_by_bom(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (!(fptr->mode & FMODE_BINMODE)) { + rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode"); + } + if (fptr->encs.enc2) { + rb_raise(rb_eArgError, "encoding conversion is set"); + } + if (!io_set_encoding_by_bom(io)) return Qnil; + return rb_enc_from_encoding(fptr->encs.enc); +} + +/* + * call-seq: * File.new(filename, mode="r" [, opt]) -> file * File.new(filename [, mode [, perm]] [, opt]) -> file * @@ -13319,6 +13356,7 @@ Init_IO(void) https://github.com/ruby/ruby/blob/trunk/io.c#L13356 rb_define_method(rb_cIO, "external_encoding", rb_io_external_encoding, 0); rb_define_method(rb_cIO, "internal_encoding", rb_io_internal_encoding, 0); rb_define_method(rb_cIO, "set_encoding", rb_io_set_encoding, -1); + rb_define_method(rb_cIO, "set_encoding_by_bom", rb_io_set_encoding_by_bom, 0); rb_define_method(rb_cIO, "autoclose?", rb_io_autoclose_p, 0); rb_define_method(rb_cIO, "autoclose=", rb_io_set_autoclose, 1); diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 6fb8d8a..630f2ee 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2097,6 +2097,10 @@ EOT https://github.com/ruby/ruby/blob/trunk/test/ruby/test_io_m17n.rb#L2097 assert_equal(Encoding::UTF_8, result.encoding, message) assert_equal(stripped, result, message) end + + File.open(path, "rb") {|f| + assert_equal(Encoding.find(name), f.set_encoding_by_bom) + } } end end @@ -2139,6 +2143,10 @@ EOT https://github.com/ruby/ruby/blob/trunk/test/ruby/test_io_m17n.rb#L2143 assert_equal(stripped, result, bug8323) result = File.read(path, encoding: 'BOM|UTF-8:UTF-8') assert_equal(stripped, result, bug8323) + + File.open(path, "rb") {|f| + assert_nil(f.set_encoding_by_bom) + } } end -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/