ruby-changes:50722
From: naruse <ko1@a...>
Date: Thu, 22 Mar 2018 20:18:06 +0900 (JST)
Subject: [ruby-changes:50722] naruse:r62896 (ruby_2_5): merge revision(s) 62892, 62893: [Backport #14363]
naruse 2018-03-22 20:18:00 +0900 (Thu, 22 Mar 2018) New Revision: 62896 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=62896 Log: merge revision(s) 62892,62893: [Backport #14363] fix each_grapheme_cluster's size [Bug #14363] From: Hugo Peixoto <hugo.peixoto@g...> Factor out get_reg_grapheme_cluster Modified directories: branches/ruby_2_5/ Modified files: branches/ruby_2_5/string.c branches/ruby_2_5/test/ruby/test_string.rb branches/ruby_2_5/version.h Index: ruby_2_5/test/ruby/test_string.rb =================================================================== --- ruby_2_5/test/ruby/test_string.rb (revision 62895) +++ ruby_2_5/test/ruby/test_string.rb (revision 62896) @@ -980,11 +980,18 @@ CODE https://github.com/ruby/ruby/blob/trunk/ruby_2_5/test/ruby/test_string.rb#L980 "\u{1f469 200d 2764 fe0f 200d 1f469}", ].each do |g| assert_equal [g], g.each_grapheme_cluster.to_a + assert_equal 1, g.each_grapheme_cluster.size + end + + [ + ["\u{a 308}", ["\u000A", "\u0308"]], + ["\u{d 308}", ["\u000D", "\u0308"]], + ["abc", ["a", "b", "c"]], + ].each do |str, grapheme_clusters| + assert_equal grapheme_clusters, str.each_grapheme_cluster.to_a + assert_equal grapheme_clusters.size, str.each_grapheme_cluster.size end - assert_equal ["\u000A", "\u0308"], "\u{a 308}".each_grapheme_cluster.to_a - assert_equal ["\u000D", "\u0308"], "\u{d 308}".each_grapheme_cluster.to_a - assert_equal ["a", "b", "c"], "abc".b.each_grapheme_cluster.to_a s = ("x"+"\u{10ABCD}"*250000) assert_empty(s.each_grapheme_cluster {s.clear}) end Index: ruby_2_5/version.h =================================================================== --- ruby_2_5/version.h (revision 62895) +++ ruby_2_5/version.h (revision 62896) @@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_5/version.h#L1 #define RUBY_VERSION "2.5.1" #define RUBY_RELEASE_DATE "2018-03-22" -#define RUBY_PATCHLEVEL 49 +#define RUBY_PATCHLEVEL 50 #define RUBY_RELEASE_YEAR 2018 #define RUBY_RELEASE_MONTH 3 Index: ruby_2_5/string.c =================================================================== --- ruby_2_5/string.c (revision 62895) +++ ruby_2_5/string.c (revision 62896) @@ -8309,20 +8309,12 @@ rb_str_codepoints(VALUE str) https://github.com/ruby/ruby/blob/trunk/ruby_2_5/string.c#L8309 return rb_str_enumerate_codepoints(str, ary); } -static VALUE -rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) +static regex_t * +get_reg_grapheme_cluster(rb_encoding *enc) { - VALUE orig = str; + int encidx = rb_enc_to_index(enc); regex_t *reg_grapheme_cluster = NULL; static regex_t *reg_grapheme_cluster_utf8 = NULL; - int encidx = ENCODING_GET(str); - rb_encoding *enc = rb_enc_from_index(encidx); - int unicode_p = rb_enc_unicode_p(enc); - const char *ptr, *end; - - if (!unicode_p || single_byte_optimizable(str)) { - return rb_str_enumerate_chars(str, ary); - } /* synchronize */ if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) { @@ -8339,8 +8331,51 @@ rb_str_enumerate_grapheme_clusters(VALUE https://github.com/ruby/ruby/blob/trunk/ruby_2_5/string.c#L8331 reg_grapheme_cluster_utf8 = reg_grapheme_cluster; } } + return reg_grapheme_cluster; +} + +static VALUE +rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj) +{ + size_t grapheme_cluster_count = 0; + regex_t *reg_grapheme_cluster = NULL; + rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); + const char *ptr, *end; + + if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + return rb_str_length(str); + } + + reg_grapheme_cluster = get_reg_grapheme_cluster(enc); + ptr = RSTRING_PTR(str); + end = RSTRING_END(str); + + while (ptr < end) { + OnigPosition len = onig_match(reg_grapheme_cluster, + (const OnigUChar *)ptr, (const OnigUChar *)end, + (const OnigUChar *)ptr, NULL, 0); + if (len <= 0) break; + grapheme_cluster_count++; + ptr += len; + } + + return SIZET2NUM(grapheme_cluster_count); +} + +static VALUE +rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) +{ + VALUE orig = str; + regex_t *reg_grapheme_cluster = NULL; + rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); + const char *ptr, *end; + + if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + return rb_str_enumerate_chars(str, ary); + } if (!ary) str = rb_str_new_frozen(str); + reg_grapheme_cluster = get_reg_grapheme_cluster(enc); ptr = RSTRING_PTR(str); end = RSTRING_END(str); @@ -8348,10 +8383,7 @@ rb_str_enumerate_grapheme_clusters(VALUE https://github.com/ruby/ruby/blob/trunk/ruby_2_5/string.c#L8383 OnigPosition len = onig_match(reg_grapheme_cluster, (const OnigUChar *)ptr, (const OnigUChar *)end, (const OnigUChar *)ptr, NULL, 0); - if (len == 0) break; - if (len < 0) { - break; - } + if (len <= 0) break; ENUM_ELEM(ary, rb_enc_str_new(ptr, len, enc)); ptr += len; } @@ -8380,7 +8412,7 @@ rb_str_enumerate_grapheme_clusters(VALUE https://github.com/ruby/ruby/blob/trunk/ruby_2_5/string.c#L8412 static VALUE rb_str_each_grapheme_cluster(VALUE str) { - RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); + RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size); return rb_str_enumerate_grapheme_clusters(str, 0); } Index: ruby_2_5 =================================================================== --- ruby_2_5 (revision 62895) +++ ruby_2_5 (revision 62896) Property changes on: ruby_2_5 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /trunk:r62892-62893 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/