[前][次][番号順一覧][スレッド一覧]

ruby-changes:39509

From: nagachika <ko1@a...>
Date: Sun, 16 Aug 2015 03:31:02 +0900 (JST)
Subject: [ruby-changes:39509] nagachika:r51590 (ruby_2_2): merge revision(s) 51470: [Backport #11413]

nagachika	2015-08-16 03:30:35 +0900 (Sun, 16 Aug 2015)

  New Revision: 51590

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=51590

  Log:
    merge revision(s) 51470: [Backport #11413]
    
    * re.c (rb_memsearch): should match only char boundaries in wide
      character encodings.  [ruby-core:70220] [Bug #11413]

  Modified directories:
    branches/ruby_2_2/
  Modified files:
    branches/ruby_2_2/ChangeLog
    branches/ruby_2_2/re.c
    branches/ruby_2_2/string.c
    branches/ruby_2_2/test/ruby/test_m17n.rb
    branches/ruby_2_2/version.h
Index: ruby_2_2/re.c
===================================================================
--- ruby_2_2/re.c	(revision 51589)
+++ ruby_2_2/re.c	(revision 51590)
@@ -221,6 +221,32 @@ rb_memsearch_qs_utf8(const unsigned char https://github.com/ruby/ruby/blob/trunk/ruby_2_2/re.c#L221
     return -1;
 }
 
+static inline long
+rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+    const unsigned char *x = xs, x0 = *xs, *y = ys;
+    enum {char_size = 2};
+
+    for (n -= m; n > 0; n -= char_size, y += char_size) {
+	if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
+	    return y - ys;
+    }
+    return -1;
+}
+
+static inline long
+rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+    const unsigned char *x = xs, x0 = *xs, *y = ys;
+    enum {char_size = 4};
+
+    for (n -= m; n > 0; n -= char_size, y += char_size) {
+	if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
+	    return y - ys;
+    }
+    return -1;
+}
+
 long
 rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
 {
@@ -241,15 +267,21 @@ rb_memsearch(const void *x0, long m, con https://github.com/ruby/ruby/blob/trunk/ruby_2_2/re.c#L267
 	else
 	    return -1;
     }
-    else if (m <= SIZEOF_VALUE) {
-	return rb_memsearch_ss(x0, m, y0, n);
+    else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
+	if (m <= SIZEOF_VALUE) {
+	    return rb_memsearch_ss(x0, m, y0, n);
+	}
+	else if (enc == rb_utf8_encoding()){
+	    return rb_memsearch_qs_utf8(x0, m, y0, n);
+	}
     }
-    else if (enc == rb_utf8_encoding()){
-	return rb_memsearch_qs_utf8(x0, m, y0, n);
+    else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
+	return rb_memsearch_wchar(x0, m, y0, n);
     }
-    else {
-	return rb_memsearch_qs(x0, m, y0, n);
+    else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
+	return rb_memsearch_qchar(x0, m, y0, n);
     }
+    return rb_memsearch_qs(x0, m, y0, n);
 }
 
 #define REG_LITERAL FL_USER5
Index: ruby_2_2/ChangeLog
===================================================================
--- ruby_2_2/ChangeLog	(revision 51589)
+++ ruby_2_2/ChangeLog	(revision 51590)
@@ -1,3 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_2/ChangeLog#L1
+Sun Aug 16 03:14:04 2015  Nobuyoshi Nakada  <nobu@r...>
+
+	* re.c (rb_memsearch): should match only char boundaries in wide
+	  character encodings.  [ruby-core:70220] [Bug #11413]
+
 Sun Aug 16 03:00:44 2015  Eric Wong  <e@8...>
 
 	* symbol.h (struct RSymbol): add hashval field
Index: ruby_2_2/string.c
===================================================================
--- ruby_2_2/string.c	(revision 51589)
+++ ruby_2_2/string.c	(revision 51590)
@@ -6373,15 +6373,10 @@ rb_str_split_m(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L6373
     }
 
     enc = STR_ENC_GET(str);
-    if (NIL_P(spat)) {
-	if (!NIL_P(rb_fs)) {
-	    spat = rb_fs;
-	    goto fs_set;
-	}
+    if (NIL_P(spat) && NIL_P(spat = rb_fs)) {
 	split_type = awk;
     }
     else {
-      fs_set:
 	spat = get_pat_quoted(spat, 0);
 	if (BUILTIN_TYPE(spat) == T_STRING) {
 	    rb_encoding *enc2 = STR_ENC_GET(spat);
Index: ruby_2_2/version.h
===================================================================
--- ruby_2_2/version.h	(revision 51589)
+++ ruby_2_2/version.h	(revision 51590)
@@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_2/version.h#L1
 #define RUBY_VERSION "2.2.3"
 #define RUBY_RELEASE_DATE "2015-08-16"
-#define RUBY_PATCHLEVEL 168
+#define RUBY_PATCHLEVEL 169
 
 #define RUBY_RELEASE_YEAR 2015
 #define RUBY_RELEASE_MONTH 8
Index: ruby_2_2/test/ruby/test_m17n.rb
===================================================================
--- ruby_2_2/test/ruby/test_m17n.rb	(revision 51589)
+++ ruby_2_2/test/ruby/test_m17n.rb	(revision 51590)
@@ -1236,6 +1236,9 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/ruby_2_2/test/ruby/test_m17n.rb#L1236
     each_encoding("abc,def", ",", "abc", "def") do |str, sep, *expected|
       assert_equal(expected, str.split(sep, -1))
     end
+    each_encoding("abc\0def", "\0", "abc", "def") do |str, sep, *expected|
+      assert_equal(expected, str.split(sep, -1))
+    end
   end
 
   def test_nonascii_method_name

Property changes on: ruby_2_2
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /trunk:r51470


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]