[前][次][番号順一覧][スレッド一覧]

ruby-changes:40752

From: usa <ko1@a...>
Date: Tue, 1 Dec 2015 17:01:19 +0900 (JST)
Subject: [ruby-changes:40752] usa:r52831 (ruby_2_1): merge revision(s) 51583, 51594, 51638: [Backport #11444]

usa	2015-12-01 17:00:58 +0900 (Tue, 01 Dec 2015)

  New Revision: 52831

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=52831

  Log:
    merge revision(s) 51583,51594,51638: [Backport #11444]
    
    * io.c (rb_io_each_codepoint): read more data when read partially.
      [ruby-core:70379] [Bug #11444]
    
    * io.c (rb_io_each_codepoint): raise an exception at incomplete
      character before EOF when conversion takes place.  [Bug #11444]

  Modified directories:
    branches/ruby_2_1/
  Modified files:
    branches/ruby_2_1/ChangeLog
    branches/ruby_2_1/NEWS
    branches/ruby_2_1/io.c
    branches/ruby_2_1/test/ruby/test_io_m17n.rb
    branches/ruby_2_1/version.h
Index: ruby_2_1/ChangeLog
===================================================================
--- ruby_2_1/ChangeLog	(revision 52830)
+++ ruby_2_1/ChangeLog	(revision 52831)
@@ -1,3 +1,13 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_1/ChangeLog#L1
+Tue Dec  1 16:57:57 2015  Nobuyoshi Nakada  <nobu@r...>
+
+	* io.c (rb_io_each_codepoint): raise an exception at incomplete
+	  character before EOF when conversion takes place.  [Bug #11444]
+
+Tue Dec  1 16:57:57 2015  Nobuyoshi Nakada  <nobu@r...>
+
+	* io.c (rb_io_each_codepoint): read more data when read partially.
+	  [ruby-core:70379] [Bug #11444]
+
 Tue Dec  1 13:06:29 2015  NAKAMURA Usaku  <usa@r...>
 
 	* ext/digest/sha1/sha1ossl.c: fixed build error introduced at r52797.
Index: ruby_2_1/io.c
===================================================================
--- ruby_2_1/io.c	(revision 52830)
+++ ruby_2_1/io.c	(revision 52831)
@@ -3630,6 +3630,7 @@ rb_io_each_codepoint(VALUE io) https://github.com/ruby/ruby/blob/trunk/ruby_2_1/io.c#L3630
     READ_CHECK(fptr);
     if (NEED_READCONV(fptr)) {
 	SET_BINARY_MODE(fptr);
+	r = 1;		/* no invalid char yet */
 	for (;;) {
 	    make_readconv(fptr, 0);
 	    for (;;) {
@@ -3648,13 +3649,16 @@ rb_io_each_codepoint(VALUE io) https://github.com/ruby/ruby/blob/trunk/ruby_2_1/io.c#L3649
 		}
 		if (more_char(fptr) == MORE_CHAR_FINISHED) {
                     clear_readconv(fptr);
-		    /* ignore an incomplete character before EOF */
+		    if (!MBCLEN_CHARFOUND_P(r)) {
+			enc = fptr->encs.enc;
+			goto invalid;
+		    }
 		    return io;
 		}
 	    }
 	    if (MBCLEN_INVALID_P(r)) {
-		rb_raise(rb_eArgError, "invalid byte sequence in %s",
-			 rb_enc_name(fptr->encs.enc));
+		enc = fptr->encs.enc;
+		goto invalid;
 	    }
 	    n = MBCLEN_CHARFOUND_LEN(r);
 	    if (fptr->encs.enc) {
@@ -3684,8 +3688,25 @@ rb_io_each_codepoint(VALUE io) https://github.com/ruby/ruby/blob/trunk/ruby_2_1/io.c#L3688
 	    rb_yield(UINT2NUM(c));
 	}
 	else if (MBCLEN_INVALID_P(r)) {
+	  invalid:
 	    rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
 	}
+	else if (MBCLEN_NEEDMORE_P(r)) {
+	    char cbuf[8], *p = cbuf;
+	    int more = MBCLEN_NEEDMORE_LEN(r);
+	    if (more > numberof(cbuf)) goto invalid;
+	    more += n = fptr->rbuf.len;
+	    if (more > numberof(cbuf)) goto invalid;
+	    while ((n = (int)read_buffered_data(p, more, fptr)) > 0 &&
+		   (p += n, (more -= n) > 0)) {
+		if (io_fillbuf(fptr) < 0) goto invalid;
+		if ((n = fptr->rbuf.len) > more) n = more;
+	    }
+	    r = rb_enc_precise_mbclen(cbuf, p, enc);
+	    if (!MBCLEN_CHARFOUND_P(r)) goto invalid;
+	    c = rb_enc_codepoint(cbuf, p, enc);
+	    rb_yield(UINT2NUM(c));
+	}
 	else {
 	    continue;
 	}
Index: ruby_2_1/NEWS
===================================================================
--- ruby_2_1/NEWS	(revision 52830)
+++ ruby_2_1/NEWS	(revision 52831)
@@ -148,6 +148,8 @@ with all sufficient information, see the https://github.com/ruby/ruby/blob/trunk/ruby_2_1/NEWS#L148
 * IO
   * incompatible changes:
     * open ignore internal encoding if external encoding is ASCII-8BIT.
+  * IO#each_codepoint raises an exception at incomplete character
+    before EOF when conversion takes place.  [Bug #11444]
 
 * Kernel#eval, Kernel#instance_eval, and Module#module_eval.
   * Copies the scope information of the original environment, which means
Index: ruby_2_1/version.h
===================================================================
--- ruby_2_1/version.h	(revision 52830)
+++ ruby_2_1/version.h	(revision 52831)
@@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_1/version.h#L1
 #define RUBY_VERSION "2.1.8"
 #define RUBY_RELEASE_DATE "2015-12-01"
-#define RUBY_PATCHLEVEL 429
+#define RUBY_PATCHLEVEL 430
 
 #define RUBY_RELEASE_YEAR 2015
 #define RUBY_RELEASE_MONTH 12
Index: ruby_2_1/test/ruby/test_io_m17n.rb
===================================================================
--- ruby_2_1/test/ruby/test_io_m17n.rb	(revision 52830)
+++ ruby_2_1/test/ruby/test_io_m17n.rb	(revision 52831)
@@ -1,6 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_1/test/ruby/test_io_m17n.rb#L1
 # coding: US-ASCII
 require 'test/unit'
 require 'tmpdir'
+require 'tempfile'
 require 'timeout'
 require_relative 'envutil'
 
@@ -2535,4 +2536,42 @@ EOT https://github.com/ruby/ruby/blob/trunk/ruby_2_1/test/ruby/test_io_m17n.rb#L2536
       end
     }
   end if /mswin|mingw/ =~ RUBY_PLATFORM
+
+  def test_each_codepoint_need_more
+    bug11444 = '[ruby-core:70379] [Bug #11444]'
+    tests = [
+      ["incomplete multibyte", "\u{1f376}".b[0,3], [], ["invalid byte sequence in UTF-8"]],
+      ["multibyte at boundary", "x"*8190+"\u{1f376}", ["1f376"], []],
+    ]
+    failure = []
+    ["bin", "text"].product(tests) do |mode, (test, data, out, err)|
+      code = <<-"end;"
+        c = nil
+        begin
+          open(ARGV[0], "r#{mode[0]}:utf-8") do |f|
+            f.each_codepoint{|i| c = i}
+          end
+        rescue ArgumentError => e
+          STDERR.puts e.message
+        else
+          printf "%x", c
+        end
+      end;
+      Tempfile.create("codepoint") do |f|
+        args = ['-e', code, f.path]
+        f.print data
+        f.close
+        begin
+          assert_in_out_err(args, "", out, err,
+                            "#{bug11444}: #{test} in #{mode} mode",
+                            timeout: 1)
+        rescue Exception => e
+          failure << e
+        end
+      end
+    end
+    unless failure.empty?
+      flunk failure.join("\n---\n")
+    end
+  end
 end

Property changes on: ruby_2_1
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /trunk:r51583,51594,51638


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]