ruby-changes:39534
From: usa <ko1@a...>
Date: Mon, 17 Aug 2015 17:59:07 +0900 (JST)
Subject: [ruby-changes:39534] usa:r51615 (ruby_2_1): merge revision(s) 51470: [Backport #11413]
usa 2015-08-17 17:58:50 +0900 (Mon, 17 Aug 2015) New Revision: 51615 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=51615 Log: merge revision(s) 51470: [Backport #11413] * re.c (rb_memsearch): should match only char boundaries in wide character encodings. [ruby-core:70220] [Bug #11413] Modified directories: branches/ruby_2_1/ Modified files: branches/ruby_2_1/ChangeLog branches/ruby_2_1/re.c branches/ruby_2_1/string.c branches/ruby_2_1/test/ruby/test_m17n.rb branches/ruby_2_1/version.h Index: ruby_2_1/ChangeLog =================================================================== --- ruby_2_1/ChangeLog (revision 51614) +++ ruby_2_1/ChangeLog (revision 51615) @@ -1,3 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_1/ChangeLog#L1 +Mon Aug 17 17:57:12 2015 Nobuyoshi Nakada <nobu@r...> + + * re.c (rb_memsearch): should match only char boundaries in wide + character encodings. [ruby-core:70220] [Bug #11413] + Mon Aug 17 17:54:33 2015 Nobuyoshi Nakada <nobu@r...> * transcode.c (rb_econv_set_replacement): target encoding name can Index: ruby_2_1/re.c =================================================================== --- ruby_2_1/re.c (revision 51614) +++ ruby_2_1/re.c (revision 51615) @@ -223,6 +223,32 @@ rb_memsearch_qs_utf8(const unsigned char https://github.com/ruby/ruby/blob/trunk/ruby_2_1/re.c#L223 return -1; } +static inline long +rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 2}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + +static inline long +rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 4}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + long rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) { @@ -243,15 +269,21 @@ rb_memsearch(const void *x0, long m, con https://github.com/ruby/ruby/blob/trunk/ruby_2_1/re.c#L269 else return -1; } - else if (m <= SIZEOF_VALUE) { - return rb_memsearch_ss(x0, m, y0, n); + else if (rb_enc_mbminlen(enc) == 1) { + if (m <= SIZEOF_VALUE) { + return rb_memsearch_ss(x0, m, y0, n); + } + else if (enc == rb_utf8_encoding()){ + return rb_memsearch_qs_utf8(x0, m, y0, n); + } } - else if (enc == rb_utf8_encoding()){ - return rb_memsearch_qs_utf8(x0, m, y0, n); + else if (rb_enc_mbminlen(enc) == 2) { + return rb_memsearch_wchar(x0, m, y0, n); } - else { - return rb_memsearch_qs(x0, m, y0, n); + else if (rb_enc_mbminlen(enc) == 4) { + return rb_memsearch_qchar(x0, m, y0, n); } + return rb_memsearch_qs(x0, m, y0, n); } #define REG_LITERAL FL_USER5 Index: ruby_2_1/string.c =================================================================== --- ruby_2_1/string.c (revision 51614) +++ ruby_2_1/string.c (revision 51615) @@ -6213,15 +6213,10 @@ rb_str_split_m(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_1/string.c#L6213 } enc = STR_ENC_GET(str); - if (NIL_P(spat)) { - if (!NIL_P(rb_fs)) { - spat = rb_fs; - goto fs_set; - } + if (NIL_P(spat) && NIL_P(spat = rb_fs)) { split_type = awk; } else { - fs_set: if (RB_TYPE_P(spat, T_STRING)) { rb_encoding *enc2 = STR_ENC_GET(spat); Index: ruby_2_1/version.h =================================================================== --- ruby_2_1/version.h (revision 51614) +++ ruby_2_1/version.h (revision 51615) @@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_1/version.h#L1 #define RUBY_VERSION "2.1.7" #define RUBY_RELEASE_DATE "2015-08-17" -#define RUBY_PATCHLEVEL 397 +#define RUBY_PATCHLEVEL 398 #define RUBY_RELEASE_YEAR 2015 #define RUBY_RELEASE_MONTH 8 Index: ruby_2_1/test/ruby/test_m17n.rb =================================================================== --- ruby_2_1/test/ruby/test_m17n.rb (revision 51614) +++ ruby_2_1/test/ruby/test_m17n.rb (revision 51615) @@ -1226,6 +1226,9 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/ruby_2_1/test/ruby/test_m17n.rb#L1226 each_encoding("abc,def", ",", "abc", "def") do |str, sep, *expected| assert_equal(expected, str.split(sep, -1)) end + each_encoding("abc\0def", "\0", "abc", "def") do |str, sep, *expected| + assert_equal(expected, str.split(sep, -1)) + end end def test_nonascii_method_name Property changes on: ruby_2_1 ___________________________________________________________________ Modified: svn:mergeinfo Merged /trunk:r51470 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/