[前][次][番号順一覧][スレッド一覧]

ruby-changes:50722

From: naruse <ko1@a...>
Date: Thu, 22 Mar 2018 20:18:06 +0900 (JST)
Subject: [ruby-changes:50722] naruse:r62896 (ruby_2_5): merge revision(s) 62892, 62893: [Backport #14363]

naruse	2018-03-22 20:18:00 +0900 (Thu, 22 Mar 2018)

  New Revision: 62896

  https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=62896

  Log:
    merge revision(s) 62892,62893: [Backport #14363]
    
    fix each_grapheme_cluster's size [Bug #14363]
    
    From: Hugo Peixoto <hugo.peixoto@g...>
    
    Factor out get_reg_grapheme_cluster

  Modified directories:
    branches/ruby_2_5/
  Modified files:
    branches/ruby_2_5/string.c
    branches/ruby_2_5/test/ruby/test_string.rb
    branches/ruby_2_5/version.h
Index: ruby_2_5/test/ruby/test_string.rb
===================================================================
--- ruby_2_5/test/ruby/test_string.rb	(revision 62895)
+++ ruby_2_5/test/ruby/test_string.rb	(revision 62896)
@@ -980,11 +980,18 @@ CODE https://github.com/ruby/ruby/blob/trunk/ruby_2_5/test/ruby/test_string.rb#L980
       "\u{1f469 200d 2764 fe0f 200d 1f469}",
     ].each do |g|
       assert_equal [g], g.each_grapheme_cluster.to_a
+      assert_equal 1, g.each_grapheme_cluster.size
+    end
+
+    [
+      ["\u{a 308}", ["\u000A", "\u0308"]],
+      ["\u{d 308}", ["\u000D", "\u0308"]],
+      ["abc", ["a", "b", "c"]],
+    ].each do |str, grapheme_clusters|
+      assert_equal grapheme_clusters, str.each_grapheme_cluster.to_a
+      assert_equal grapheme_clusters.size, str.each_grapheme_cluster.size
     end
 
-    assert_equal ["\u000A", "\u0308"], "\u{a 308}".each_grapheme_cluster.to_a
-    assert_equal ["\u000D", "\u0308"], "\u{d 308}".each_grapheme_cluster.to_a
-    assert_equal ["a", "b", "c"], "abc".b.each_grapheme_cluster.to_a
     s = ("x"+"\u{10ABCD}"*250000)
     assert_empty(s.each_grapheme_cluster {s.clear})
   end
Index: ruby_2_5/version.h
===================================================================
--- ruby_2_5/version.h	(revision 62895)
+++ ruby_2_5/version.h	(revision 62896)
@@ -1,6 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/ruby_2_5/version.h#L1
 #define RUBY_VERSION "2.5.1"
 #define RUBY_RELEASE_DATE "2018-03-22"
-#define RUBY_PATCHLEVEL 49
+#define RUBY_PATCHLEVEL 50
 
 #define RUBY_RELEASE_YEAR 2018
 #define RUBY_RELEASE_MONTH 3
Index: ruby_2_5/string.c
===================================================================
--- ruby_2_5/string.c	(revision 62895)
+++ ruby_2_5/string.c	(revision 62896)
@@ -8309,20 +8309,12 @@ rb_str_codepoints(VALUE str) https://github.com/ruby/ruby/blob/trunk/ruby_2_5/string.c#L8309
     return rb_str_enumerate_codepoints(str, ary);
 }
 
-static VALUE
-rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
+static regex_t *
+get_reg_grapheme_cluster(rb_encoding *enc)
 {
-    VALUE orig = str;
+    int encidx = rb_enc_to_index(enc);
     regex_t *reg_grapheme_cluster = NULL;
     static regex_t *reg_grapheme_cluster_utf8 = NULL;
-    int encidx = ENCODING_GET(str);
-    rb_encoding *enc = rb_enc_from_index(encidx);
-    int unicode_p = rb_enc_unicode_p(enc);
-    const char *ptr, *end;
-
-    if (!unicode_p || single_byte_optimizable(str)) {
-	return rb_str_enumerate_chars(str, ary);
-    }
 
     /* synchronize */
     if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) {
@@ -8339,8 +8331,51 @@ rb_str_enumerate_grapheme_clusters(VALUE https://github.com/ruby/ruby/blob/trunk/ruby_2_5/string.c#L8331
 	    reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
 	}
     }
+    return reg_grapheme_cluster;
+}
+
+static VALUE
+rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
+{
+    size_t grapheme_cluster_count = 0;
+    regex_t *reg_grapheme_cluster = NULL;
+    rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
+    const char *ptr, *end;
+
+    if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
+	return rb_str_length(str);
+    }
+
+    reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
+    ptr = RSTRING_PTR(str);
+    end = RSTRING_END(str);
+
+    while (ptr < end) {
+	OnigPosition len = onig_match(reg_grapheme_cluster,
+				      (const OnigUChar *)ptr, (const OnigUChar *)end,
+				      (const OnigUChar *)ptr, NULL, 0);
+	if (len <= 0) break;
+	grapheme_cluster_count++;
+	ptr += len;
+    }
+
+    return SIZET2NUM(grapheme_cluster_count);
+}
+
+static VALUE
+rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
+{
+    VALUE orig = str;
+    regex_t *reg_grapheme_cluster = NULL;
+    rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
+    const char *ptr, *end;
+
+    if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
+	return rb_str_enumerate_chars(str, ary);
+    }
 
     if (!ary) str = rb_str_new_frozen(str);
+    reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
     ptr = RSTRING_PTR(str);
     end = RSTRING_END(str);
 
@@ -8348,10 +8383,7 @@ rb_str_enumerate_grapheme_clusters(VALUE https://github.com/ruby/ruby/blob/trunk/ruby_2_5/string.c#L8383
 	OnigPosition len = onig_match(reg_grapheme_cluster,
 				      (const OnigUChar *)ptr, (const OnigUChar *)end,
 				      (const OnigUChar *)ptr, NULL, 0);
-	if (len == 0) break;
-	if (len < 0) {
-	    break;
-	}
+	if (len <= 0) break;
 	ENUM_ELEM(ary, rb_enc_str_new(ptr, len, enc));
 	ptr += len;
     }
@@ -8380,7 +8412,7 @@ rb_str_enumerate_grapheme_clusters(VALUE https://github.com/ruby/ruby/blob/trunk/ruby_2_5/string.c#L8412
 static VALUE
 rb_str_each_grapheme_cluster(VALUE str)
 {
-    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
+    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size);
     return rb_str_enumerate_grapheme_clusters(str, 0);
 }
 
Index: ruby_2_5
===================================================================
--- ruby_2_5	(revision 62895)
+++ ruby_2_5	(revision 62896)

Property changes on: ruby_2_5
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /trunk:r62892-62893

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]