ruby-changes:45862

naruse	2017-03-13 02:22:20 +0900 (Mon, 13 Mar 2017)

  New Revision: 57935

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=57935

  Log:
    merge revision(s) 57816,57817: [Backport #13292]
    
    fix UTF-32 valid_encoding?
    
    * enc/utf_32be.c (utf32be_mbc_enc_len): check arguments precisely.
      [ruby-core:79966] [Bug #13292]
    
    * enc/utf_32le.c (utf32le_mbc_enc_len): ditto.
    
    * regenc.h (UNICODE_VALID_CODEPOINT_P): predicate for valid
      Unicode codepoints.
    fix UTF-32 valid_encoding?
    
    * test/ruby/test_io_m17n.rb (TestIO_M17N#test_puts_widechar): do
      not use invalid codepoint.  [ruby-core:79966] [Bug #13292]

  Modified directories:
    branches/ruby_2_4/
  Modified files:
    branches/ruby_2_4/enc/utf_32be.c
    branches/ruby_2_4/enc/utf_32le.c
    branches/ruby_2_4/regenc.h
    branches/ruby_2_4/test/ruby/enc/test_utf32.rb
    branches/ruby_2_4/test/ruby/test_io_m17n.rb
    branches/ruby_2_4/version.h
Index: ruby_2_4/version.h
===================================================================
--- ruby_2_4/version.h	(revision 57934)
+++ ruby_2_4/version.h	(revision 57935)
@@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_4/version.h#L1
 #define RUBY_VERSION "2.4.0"
 #define RUBY_RELEASE_DATE "2017-03-13"
-#define RUBY_PATCHLEVEL 89
+#define RUBY_PATCHLEVEL 90
 
 #define RUBY_RELEASE_YEAR 2017
 #define RUBY_RELEASE_MONTH 3
Index: ruby_2_4/test/ruby/test_io_m17n.rb
===================================================================
--- ruby_2_4/test/ruby/test_io_m17n.rb	(revision 57934)
+++ ruby_2_4/test/ruby/test_io_m17n.rb	(revision 57935)
@@ -2237,7 +2237,7 @@ EOT https://github.com/ruby/ruby/blob/trunk/ruby_2_4/test/ruby/test_io_m17n.rb#L2237
            w.binmode
            w.puts(0x010a.chr(Encoding::UTF_32BE))
            w.puts(0x010a.chr(Encoding::UTF_16BE))
-           w.puts(0x0a010000.chr(Encoding::UTF_32LE))
+           w.puts(0x0a01.chr(Encoding::UTF_32LE))
            w.puts(0x0a01.chr(Encoding::UTF_16LE))
            w.close
          end,
@@ -2245,7 +2245,7 @@ EOT https://github.com/ruby/ruby/blob/trunk/ruby_2_4/test/ruby/test_io_m17n.rb#L2245
            r.binmode
            assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug)
            assert_equal("\x01\x0a\n", r.read(3), bug)
-           assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug)
+           assert_equal("\x01\x0a\x00\x00\n", r.read(5), bug)
            assert_equal("\x01\x0a\n", r.read(3), bug)
            assert_equal("", r.read, bug)
            r.close
Index: ruby_2_4/test/ruby/enc/test_utf32.rb
===================================================================
--- ruby_2_4/test/ruby/enc/test_utf32.rb	(revision 57934)
+++ ruby_2_4/test/ruby/enc/test_utf32.rb	(revision 57935)
@@ -90,5 +90,73 @@ EOT https://github.com/ruby/ruby/blob/trunk/ruby_2_4/test/ruby/enc/test_utf32.rb#L90
     assert_equal(sl, "a".ord.chr("utf-32le"))
     assert_equal(sb, "a".ord.chr("utf-32be"))
   end
+
+  def test_utf32be_valid_encoding
+    all_assertions do |a|
+      [
+        "\x00\x00\x00\x00",
+        "\x00\x00\x00a",
+        "\x00\x00\x30\x40",
+        "\x00\x00\xd7\xff",
+        "\x00\x00\xe0\x00",
+        "\x00\x00\xff\xff",
+        "\x00\x10\xff\xff",
+      ].each {|s|
+        s.force_encoding("utf-32be")
+        a.for(s) {
+          assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+      [
+        "a",
+        "\x00a",
+        "\x00\x00a",
+        "\x00\x00\xd8\x00",
+        "\x00\x00\xdb\xff",
+        "\x00\x00\xdc\x00",
+        "\x00\x00\xdf\xff",
+        "\x00\x11\x00\x00",
+      ].each {|s|
+        s.force_encoding("utf-32be")
+        a.for(s) {
+          assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+    end
+  end
+
+  def test_utf32le_valid_encoding
+    all_assertions do |a|
+      [
+        "\x00\x00\x00\x00",
+        "a\x00\x00\x00",
+        "\x40\x30\x00\x00",
+        "\xff\xd7\x00\x00",
+        "\x00\xe0\x00\x00",
+        "\xff\xff\x00\x00",
+        "\xff\xff\x10\x00",
+      ].each {|s|
+        s.force_encoding("utf-32le")
+        a.for(s) {
+          assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+      [
+        "a",
+        "a\x00",
+        "a\x00\x00",
+        "\x00\xd8\x00\x00",
+        "\xff\xdb\x00\x00",
+        "\x00\xdc\x00\x00",
+        "\xff\xdf\x00\x00",
+        "\x00\x00\x11\x00",
+      ].each {|s|
+        s.force_encoding("utf-32le")
+        a.for(s) {
+          assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+        }
+      }
+    end
+  end
 end
 
Index: ruby_2_4/regenc.h
===================================================================
--- ruby_2_4/regenc.h	(revision 57934)
+++ ruby_2_4/regenc.h	(revision 57935)
@@ -186,6 +186,9 @@ ONIG_EXTERN int onigenc_unicode_apply_al https://github.com/ruby/ruby/blob/trunk/ruby_2_4/regenc.h#L186
 #define UTF16_IS_SURROGATE_FIRST(c)    (((c) & 0xfc) == 0xd8)
 #define UTF16_IS_SURROGATE_SECOND(c)   (((c) & 0xfc) == 0xdc)
 #define UTF16_IS_SURROGATE(c)          (((c) & 0xf8) == 0xd8)
+#define UNICODE_VALID_CODEPOINT_P(c) ( \
+	((c) <= 0x10ffff) && \
+	!((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8)))
 
 #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
   OnigEncISO_8859_1_ToLowerCaseTable[c]
Index: ruby_2_4/enc/utf_32le.c
===================================================================
--- ruby_2_4/enc/utf_32le.c	(revision 57934)
+++ ruby_2_4/enc/utf_32le.c	(revision 57935)
@@ -30,11 +30,23 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_4/enc/utf_32le.c#L30
 #include "regenc.h"
 #include "iso_8859.h"
 
+static OnigCodePoint utf32le_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
 static int
-utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
-		    OnigEncoding enc ARG_UNUSED)
+utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
+		    OnigEncoding enc)
 {
-  return 4;
+  if (e < p) {
+    return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+  }
+  else if (e-p < 4) {
+    return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
+  }
+  else {
+    OnigCodePoint c = utf32le_mbc_to_code(p, e, enc);
+    if (!UNICODE_VALID_CODEPOINT_P(c))
+      return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+    return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
+  }
 }
 
 static int
Index: ruby_2_4/enc/utf_32be.c
===================================================================
--- ruby_2_4/enc/utf_32be.c	(revision 57934)
+++ ruby_2_4/enc/utf_32be.c	(revision 57935)
@@ -30,11 +30,23 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_4/enc/utf_32be.c#L30
 #include "regenc.h"
 #include "iso_8859.h"
 
+static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
 static int
-utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
-		    OnigEncoding enc ARG_UNUSED)
+utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
+		    OnigEncoding enc)
 {
-  return 4;
+  if (e < p) {
+    return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+  }
+  else if (e-p < 4) {
+    return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
+  }
+  else {
+    OnigCodePoint c = utf32be_mbc_to_code(p, e, enc);
+    if (!UNICODE_VALID_CODEPOINT_P(c))
+      return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+    return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
+  }
 }
 
 static int

Property changes on: ruby_2_4
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /trunk:r57816-57817


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/