ruby-changes:39509
From: nagachika <ko1@a...>
Date: Sun, 16 Aug 2015 03:31:02 +0900 (JST)
Subject: [ruby-changes:39509] nagachika:r51590 (ruby_2_2): merge revision(s) 51470: [Backport #11413]
nagachika 2015-08-16 03:30:35 +0900 (Sun, 16 Aug 2015) New Revision: 51590 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=51590 Log: merge revision(s) 51470: [Backport #11413] * re.c (rb_memsearch): should match only char boundaries in wide character encodings. [ruby-core:70220] [Bug #11413] Modified directories: branches/ruby_2_2/ Modified files: branches/ruby_2_2/ChangeLog branches/ruby_2_2/re.c branches/ruby_2_2/string.c branches/ruby_2_2/test/ruby/test_m17n.rb branches/ruby_2_2/version.h Index: ruby_2_2/re.c =================================================================== --- ruby_2_2/re.c (revision 51589) +++ ruby_2_2/re.c (revision 51590) @@ -221,6 +221,32 @@ rb_memsearch_qs_utf8(const unsigned char https://github.com/ruby/ruby/blob/trunk/ruby_2_2/re.c#L221 return -1; } +static inline long +rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 2}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + +static inline long +rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 4}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + long rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) { @@ -241,15 +267,21 @@ rb_memsearch(const void *x0, long m, con https://github.com/ruby/ruby/blob/trunk/ruby_2_2/re.c#L267 else return -1; } - else if (m <= SIZEOF_VALUE) { - return rb_memsearch_ss(x0, m, y0, n); + else if (LIKELY(rb_enc_mbminlen(enc) == 1)) { + if (m <= SIZEOF_VALUE) { + return rb_memsearch_ss(x0, m, y0, n); + } + else if (enc == rb_utf8_encoding()){ + return rb_memsearch_qs_utf8(x0, m, y0, n); + } } - else if (enc == rb_utf8_encoding()){ - return rb_memsearch_qs_utf8(x0, m, y0, n); + else if (LIKELY(rb_enc_mbminlen(enc) == 2)) { + return rb_memsearch_wchar(x0, m, y0, n); } - else { - return rb_memsearch_qs(x0, m, y0, n); + else if (LIKELY(rb_enc_mbminlen(enc) == 4)) { + return rb_memsearch_qchar(x0, m, y0, n); } + return rb_memsearch_qs(x0, m, y0, n); } #define REG_LITERAL FL_USER5 Index: ruby_2_2/ChangeLog =================================================================== --- ruby_2_2/ChangeLog (revision 51589) +++ ruby_2_2/ChangeLog (revision 51590) @@ -1,3 +1,8 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_2/ChangeLog#L1 +Sun Aug 16 03:14:04 2015 Nobuyoshi Nakada <nobu@r...> + + * re.c (rb_memsearch): should match only char boundaries in wide + character encodings. [ruby-core:70220] [Bug #11413] + Sun Aug 16 03:00:44 2015 Eric Wong <e@8...> * symbol.h (struct RSymbol): add hashval field Index: ruby_2_2/string.c =================================================================== --- ruby_2_2/string.c (revision 51589) +++ ruby_2_2/string.c (revision 51590) @@ -6373,15 +6373,10 @@ rb_str_split_m(int argc, VALUE *argv, VA https://github.com/ruby/ruby/blob/trunk/ruby_2_2/string.c#L6373 } enc = STR_ENC_GET(str); - if (NIL_P(spat)) { - if (!NIL_P(rb_fs)) { - spat = rb_fs; - goto fs_set; - } + if (NIL_P(spat) && NIL_P(spat = rb_fs)) { split_type = awk; } else { - fs_set: spat = get_pat_quoted(spat, 0); if (BUILTIN_TYPE(spat) == T_STRING) { rb_encoding *enc2 = STR_ENC_GET(spat); Index: ruby_2_2/version.h =================================================================== --- ruby_2_2/version.h (revision 51589) +++ ruby_2_2/version.h (revision 51590) @@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_2/version.h#L1 #define RUBY_VERSION "2.2.3" #define RUBY_RELEASE_DATE "2015-08-16" -#define RUBY_PATCHLEVEL 168 +#define RUBY_PATCHLEVEL 169 #define RUBY_RELEASE_YEAR 2015 #define RUBY_RELEASE_MONTH 8 Index: ruby_2_2/test/ruby/test_m17n.rb =================================================================== --- ruby_2_2/test/ruby/test_m17n.rb (revision 51589) +++ ruby_2_2/test/ruby/test_m17n.rb (revision 51590) @@ -1236,6 +1236,9 @@ class TestM17N < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/ruby_2_2/test/ruby/test_m17n.rb#L1236 each_encoding("abc,def", ",", "abc", "def") do |str, sep, *expected| assert_equal(expected, str.split(sep, -1)) end + each_encoding("abc\0def", "\0", "abc", "def") do |str, sep, *expected| + assert_equal(expected, str.split(sep, -1)) + end end def test_nonascii_method_name Property changes on: ruby_2_2 ___________________________________________________________________ Modified: svn:mergeinfo Merged /trunk:r51470 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/