ruby-changes:57009
From: Nobuyoshi <ko1@a...>
Date: Tue, 13 Aug 2019 23:42:01 +0900 (JST)
Subject: [ruby-changes:57009] Nobuyoshi Nakada: 5b1bf8dd2d (master): UTF LE is fixed at least the first 2 bytes
https://git.ruby-lang.org/ruby.git/commit/?id=5b1bf8dd2d From 5b1bf8dd2d08ae7371ecf025967376bb794ed651 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada <nobu@r...> Date: Tue, 13 Aug 2019 23:23:43 +0900 Subject: UTF LE is fixed at least the first 2 bytes * io.c (io_strip_bom): if the first 2 bytes are 0xFF0xFE, it should be a little-endian UTF, 16 or 32. [Bug #16099] diff --git a/io.c b/io.c index 69dbc93..6a4c5b9 100644 --- a/io.c +++ b/io.c @@ -6136,12 +6136,9 @@ io_strip_bom(VALUE io) https://github.com/ruby/ruby/blob/trunk/io.c#L6136 return ENCINDEX_UTF_32LE; } rb_io_ungetbyte(io, b4); - rb_io_ungetbyte(io, b3); - } - else { - rb_io_ungetbyte(io, b3); - return ENCINDEX_UTF_16LE; } + rb_io_ungetbyte(io, b3); + return ENCINDEX_UTF_16LE; } rb_io_ungetbyte(io, b2); break; diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 36c154d..3deab76 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -87,7 +87,7 @@ class TestFile < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_file.rb#L87 end def test_bom_32le - assert_bom(["\xFF\xFE\0", "\0"], __method__) + assert_bom(["\xFF", "\xFE\0\0"], __method__) end def test_truncate_wbuf diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 630f2ee..8101bfb 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2084,8 +2084,8 @@ EOT https://github.com/ruby/ruby/blob/trunk/test/ruby/test_io_m17n.rb#L2084 define_method("test_strip_bom:#{name}") do path = "#{name}-bom.txt" with_tmpdir { - text = "\uFEFFa" - stripped = "a" + text = "\uFEFF\u0100a" + stripped = "\u0100a" content = text.encode(name) generate_file(path, content) result = File.read(path, mode: 'rb:BOM|UTF-8') -- cgit v0.10.2 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/