ruby-changes:40752
From: usa <ko1@a...>
Date: Tue, 1 Dec 2015 17:01:19 +0900 (JST)
Subject: [ruby-changes:40752] usa:r52831 (ruby_2_1): merge revision(s) 51583, 51594, 51638: [Backport #11444]
usa 2015-12-01 17:00:58 +0900 (Tue, 01 Dec 2015) New Revision: 52831 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=52831 Log: merge revision(s) 51583,51594,51638: [Backport #11444] * io.c (rb_io_each_codepoint): read more data when read partially. [ruby-core:70379] [Bug #11444] * io.c (rb_io_each_codepoint): raise an exception at incomplete character before EOF when conversion takes place. [Bug #11444] Modified directories: branches/ruby_2_1/ Modified files: branches/ruby_2_1/ChangeLog branches/ruby_2_1/NEWS branches/ruby_2_1/io.c branches/ruby_2_1/test/ruby/test_io_m17n.rb branches/ruby_2_1/version.h Index: ruby_2_1/ChangeLog =================================================================== --- ruby_2_1/ChangeLog (revision 52830) +++ ruby_2_1/ChangeLog (revision 52831) @@ -1,3 +1,13 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_1/ChangeLog#L1 +Tue Dec 1 16:57:57 2015 Nobuyoshi Nakada <nobu@r...> + + * io.c (rb_io_each_codepoint): raise an exception at incomplete + character before EOF when conversion takes place. [Bug #11444] + +Tue Dec 1 16:57:57 2015 Nobuyoshi Nakada <nobu@r...> + + * io.c (rb_io_each_codepoint): read more data when read partially. + [ruby-core:70379] [Bug #11444] + Tue Dec 1 13:06:29 2015 NAKAMURA Usaku <usa@r...> * ext/digest/sha1/sha1ossl.c: fixed build error introduced at r52797. Index: ruby_2_1/io.c =================================================================== --- ruby_2_1/io.c (revision 52830) +++ ruby_2_1/io.c (revision 52831) @@ -3630,6 +3630,7 @@ rb_io_each_codepoint(VALUE io) https://github.com/ruby/ruby/blob/trunk/ruby_2_1/io.c#L3630 READ_CHECK(fptr); if (NEED_READCONV(fptr)) { SET_BINARY_MODE(fptr); + r = 1; /* no invalid char yet */ for (;;) { make_readconv(fptr, 0); for (;;) { @@ -3648,13 +3649,16 @@ rb_io_each_codepoint(VALUE io) https://github.com/ruby/ruby/blob/trunk/ruby_2_1/io.c#L3649 } if (more_char(fptr) == MORE_CHAR_FINISHED) { clear_readconv(fptr); - /* ignore an incomplete character before EOF */ + if (!MBCLEN_CHARFOUND_P(r)) { + enc = fptr->encs.enc; + goto invalid; + } return io; } } if (MBCLEN_INVALID_P(r)) { - rb_raise(rb_eArgError, "invalid byte sequence in %s", - rb_enc_name(fptr->encs.enc)); + enc = fptr->encs.enc; + goto invalid; } n = MBCLEN_CHARFOUND_LEN(r); if (fptr->encs.enc) { @@ -3684,8 +3688,25 @@ rb_io_each_codepoint(VALUE io) https://github.com/ruby/ruby/blob/trunk/ruby_2_1/io.c#L3688 rb_yield(UINT2NUM(c)); } else if (MBCLEN_INVALID_P(r)) { + invalid: rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); } + else if (MBCLEN_NEEDMORE_P(r)) { + char cbuf[8], *p = cbuf; + int more = MBCLEN_NEEDMORE_LEN(r); + if (more > numberof(cbuf)) goto invalid; + more += n = fptr->rbuf.len; + if (more > numberof(cbuf)) goto invalid; + while ((n = (int)read_buffered_data(p, more, fptr)) > 0 && + (p += n, (more -= n) > 0)) { + if (io_fillbuf(fptr) < 0) goto invalid; + if ((n = fptr->rbuf.len) > more) n = more; + } + r = rb_enc_precise_mbclen(cbuf, p, enc); + if (!MBCLEN_CHARFOUND_P(r)) goto invalid; + c = rb_enc_codepoint(cbuf, p, enc); + rb_yield(UINT2NUM(c)); + } else { continue; } Index: ruby_2_1/NEWS =================================================================== --- ruby_2_1/NEWS (revision 52830) +++ ruby_2_1/NEWS (revision 52831) @@ -148,6 +148,8 @@ with all sufficient information, see the https://github.com/ruby/ruby/blob/trunk/ruby_2_1/NEWS#L148 * IO * incompatible changes: * open ignore internal encoding if external encoding is ASCII-8BIT. + * IO#each_codepoint raises an exception at incomplete character + before EOF when conversion takes place. [Bug #11444] * Kernel#eval, Kernel#instance_eval, and Module#module_eval. * Copies the scope information of the original environment, which means Index: ruby_2_1/version.h =================================================================== --- ruby_2_1/version.h (revision 52830) +++ ruby_2_1/version.h (revision 52831) @@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_1/version.h#L1 #define RUBY_VERSION "2.1.8" #define RUBY_RELEASE_DATE "2015-12-01" -#define RUBY_PATCHLEVEL 429 +#define RUBY_PATCHLEVEL 430 #define RUBY_RELEASE_YEAR 2015 #define RUBY_RELEASE_MONTH 12 Index: ruby_2_1/test/ruby/test_io_m17n.rb =================================================================== --- ruby_2_1/test/ruby/test_io_m17n.rb (revision 52830) +++ ruby_2_1/test/ruby/test_io_m17n.rb (revision 52831) @@ -1,6 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_1/test/ruby/test_io_m17n.rb#L1 # coding: US-ASCII require 'test/unit' require 'tmpdir' +require 'tempfile' require 'timeout' require_relative 'envutil' @@ -2535,4 +2536,42 @@ EOT https://github.com/ruby/ruby/blob/trunk/ruby_2_1/test/ruby/test_io_m17n.rb#L2536 end } end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_each_codepoint_need_more + bug11444 = '[ruby-core:70379] [Bug #11444]' + tests = [ + ["incomplete multibyte", "\u{1f376}".b[0,3], [], ["invalid byte sequence in UTF-8"]], + ["multibyte at boundary", "x"*8190+"\u{1f376}", ["1f376"], []], + ] + failure = [] + ["bin", "text"].product(tests) do |mode, (test, data, out, err)| + code = <<-"end;" + c = nil + begin + open(ARGV[0], "r#{mode[0]}:utf-8") do |f| + f.each_codepoint{|i| c = i} + end + rescue ArgumentError => e + STDERR.puts e.message + else + printf "%x", c + end + end; + Tempfile.create("codepoint") do |f| + args = ['-e', code, f.path] + f.print data + f.close + begin + assert_in_out_err(args, "", out, err, + "#{bug11444}: #{test} in #{mode} mode", + timeout: 1) + rescue Exception => e + failure << e + end + end + end + unless failure.empty? + flunk failure.join("\n---\n") + end + end end Property changes on: ruby_2_1 ___________________________________________________________________ Modified: svn:mergeinfo Merged /trunk:r51583,51594,51638 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/