[前][次][番号順一覧][スレッド一覧]

ruby-changes:55652

From: Nobuyoshi <ko1@a...>
Date: Fri, 3 May 2019 23:59:58 +0900 (JST)
Subject: [ruby-changes:55652] Nobuyoshi Nakada: 77440e949b (trunk): Improve performance of case-conversion methods

https://git.ruby-lang.org/ruby.git/commit/?id=77440e949b

From 77440e949bd69e6ed86d70026d238521adb8319a Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@r...>
Date: Fri, 3 May 2019 22:37:51 +0900
Subject: Improve performance of case-conversion methods


diff --git a/benchmark/string_capitalize.yml b/benchmark/string_capitalize.yml
new file mode 100644
index 0000000..7d23fd3
--- /dev/null
+++ b/benchmark/string_capitalize.yml
@@ -0,0 +1,10 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/string_capitalize.yml#L1
+prelude: |
+  str1 = [*"a".."m",*"N".."Z",*"0".."9"].join("")
+  str10 = str1 * 10
+  str100 = str10 * 10
+  str1000 = str100 * 10
+benchmark:
+  capitalize-1: str1.capitalize
+  capitalize-10: str10.capitalize
+  capitalize-100: str100.capitalize
+  capitalize-1000: str1000.capitalize
diff --git a/benchmark/string_downcase.yml b/benchmark/string_downcase.yml
new file mode 100644
index 0000000..a31c3ac
--- /dev/null
+++ b/benchmark/string_downcase.yml
@@ -0,0 +1,10 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/string_downcase.yml#L1
+prelude: |
+  str1 = [*"A".."Z",*"0".."9"].join("")
+  str10 = str1 * 10
+  str100 = str10 * 10
+  str1000 = str100 * 10
+benchmark:
+  downcase-1: str1.upcase
+  downcase-10: str10.upcase
+  downcase-100: str100.upcase
+  downcase-1000: str1000.upcase
diff --git a/benchmark/string_swapcase.yml b/benchmark/string_swapcase.yml
new file mode 100644
index 0000000..afaae3f
--- /dev/null
+++ b/benchmark/string_swapcase.yml
@@ -0,0 +1,10 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/string_swapcase.yml#L1
+prelude: |
+  str1 = [*"A".."M",*"n".."z",*"0".."9"].join("")
+  str10 = str1 * 10
+  str100 = str10 * 10
+  str1000 = str100 * 10
+benchmark:
+  swapcase-1: str1.swapcase
+  swapcase-10: str10.swapcase
+  swapcase-100: str100.swapcase
+  swapcase-1000: str1000.swapcase
diff --git a/benchmark/string_upcase.yml b/benchmark/string_upcase.yml
new file mode 100644
index 0000000..456d213
--- /dev/null
+++ b/benchmark/string_upcase.yml
@@ -0,0 +1,10 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/string_upcase.yml#L1
+prelude: |
+  str1 = [*"a".."z",*"0".."9"].join("")
+  str10 = str1 * 10
+  str100 = str10 * 10
+  str1000 = str100 * 10
+benchmark:
+  upcase-1: str1.upcase
+  upcase-10: str10.upcase
+  upcase-100: str100.upcase
+  upcase-1000: str1000.upcase
diff --git a/string.c b/string.c
index 2febe52..76d8c56 100644
--- a/string.c
+++ b/string.c
@@ -6408,6 +6408,14 @@ rb_str_check_dummy_enc(rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/string.c#L6408
     }
 }
 
+static rb_encoding *
+str_true_enc(VALUE str)
+{
+    rb_encoding *enc = STR_ENC_GET(str);
+    rb_str_check_dummy_enc(enc);
+    return enc;
+}
+
 static OnigCaseFoldType
 check_case_options(int argc, VALUE *argv, OnigCaseFoldType flags)
 {
@@ -6448,6 +6456,14 @@ check_case_options(int argc, VALUE *argv, OnigCaseFoldType flags) https://github.com/ruby/ruby/blob/trunk/string.c#L6456
     return flags;
 }
 
+static inline bool
+case_option_single_p(OnigCaseFoldType flags, rb_encoding *enc, VALUE str)
+{
+    if ((flags & ONIGENC_CASE_ASCII_ONLY) && (rb_enc_mbmaxlen(enc) == 1))
+        return true;
+    return !(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT;
+}
+
 /* 16 should be long enough to absorb any kind of single character length increase */
 #define CASE_MAPPING_ADDITIONAL_LENGTH 20
 #ifndef CASEMAP_DEBUG
@@ -6484,7 +6500,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/string.c#L6500
 {
     VALUE target;
 
-    OnigUChar *source_current, *source_end;
+    const OnigUChar *source_current, *source_end;
     int target_length = 0;
     VALUE buffer_anchor;
     mapping_buffer *current_buffer = 0;
@@ -6554,21 +6570,30 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/string.c#L6570
     return target;
 }
 
-static void
-rb_str_ascii_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
+static VALUE
+rb_str_ascii_casemap(VALUE source, VALUE target, OnigCaseFoldType *flags, rb_encoding *enc)
 {
-    OnigUChar *source_current, *source_end;
+    const OnigUChar *source_current, *source_end;
+    OnigUChar *target_current, *target_end;
     long old_length = RSTRING_LEN(source);
     int length_or_invalid;
 
-    if (old_length == 0) return;
+    if (old_length == 0) return Qnil;
 
     source_current = (OnigUChar*)RSTRING_PTR(source);
     source_end = (OnigUChar*)RSTRING_END(source);
+    if (source == target) {
+        target_current = (OnigUChar*)source_current;
+        target_end = (OnigUChar*)source_end;
+    }
+    else {
+        target_current = (OnigUChar*)RSTRING_PTR(target);
+        target_end = (OnigUChar*)RSTRING_END(target);
+    }
 
     length_or_invalid = onigenc_ascii_only_case_map(flags,
-			       (const OnigUChar**)&source_current, source_end,
-			       source_current, source_end, enc);
+			       &source_current, source_end,
+			       target_current, target_end, enc);
     if (length_or_invalid < 0)
         rb_raise(rb_eArgError, "input string invalid");
     if (CASEMAP_DEBUG && length_or_invalid != old_length) {
@@ -6577,6 +6602,29 @@ rb_str_ascii_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) https://github.com/ruby/ruby/blob/trunk/string.c#L6602
 	rb_raise(rb_eArgError, "internal problem with rb_str_ascii_casemap"
 		 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
     }
+
+    OBJ_INFECT_RAW(target, source);
+    str_enc_copy(target, source);
+
+    return target;
+}
+
+static bool
+upcase_single(VALUE str)
+{
+    char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
+    bool modified = false;
+
+    while (s < send) {
+        unsigned int c = *(unsigned char*)s;
+
+        if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
+            *s = 'A' + (c - 'a');
+            modified = true;
+        }
+        s++;
+    }
+    return modified;
 }
 
 /*
@@ -6598,24 +6646,13 @@ rb_str_upcase_bang(int argc, VALUE *argv, VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L6646
 
     flags = check_case_options(argc, argv, flags);
     str_modify_keep_cr(str);
-    enc = STR_ENC_GET(str);
-    rb_str_check_dummy_enc(enc);
-    if (((flags&ONIGENC_CASE_ASCII_ONLY) && (rb_enc_mbmaxlen(enc)==1))
-	|| (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) {
-        char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
-
-	while (s < send) {
-	    unsigned int c = *(unsigned char*)s;
-
-	    if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
-		*s = 'A' + (c - 'a');
-		flags |= ONIGENC_CASE_MODIFIED;
-	    }
-	    s++;
-	}
+    enc = str_true_enc(str);
+    if (case_option_single_p(flags, enc, str)) {
+        if (upcase_single(str))
+            flags |= ONIGENC_CASE_MODIFIED;
     }
     else if (flags&ONIGENC_CASE_ASCII_ONLY)
-        rb_str_ascii_casemap(str, &flags, enc);
+        rb_str_ascii_casemap(str, str, &flags, enc);
     else
 	str_shared_replace(str, rb_str_casemap(str, &flags, enc));
 
@@ -6640,9 +6677,46 @@ rb_str_upcase_bang(int argc, VALUE *argv, VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L6677
 static VALUE
 rb_str_upcase(int argc, VALUE *argv, VALUE str)
 {
-    str = rb_str_dup(str);
-    rb_str_upcase_bang(argc, argv, str);
-    return str;
+    rb_encoding *enc;
+    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
+    VALUE ret;
+
+    flags = check_case_options(argc, argv, flags);
+    enc = str_true_enc(str);
+    if (case_option_single_p(flags, enc, str)) {
+        ret = rb_str_new_with_class(str, RSTRING_PTR(str), RSTRING_LEN(str));
+        OBJ_INFECT_RAW(ret, str);
+        str_enc_copy(ret, str);
+        upcase_single(ret);
+    }
+    else if (flags&ONIGENC_CASE_ASCII_ONLY) {
+        ret = rb_str_new_with_class(str, 0, RSTRING_LEN(str));
+        rb_str_ascii_casemap(str, ret, &flags, enc);
+    }
+    else {
+        ret = rb_str_casemap(str, &flags, enc);
+    }
+
+    return ret;
+}
+
+static bool
+downcase_single(VALUE str)
+{
+    char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
+    bool modified = false;
+
+    while (s < send) {
+        unsigned int c = *(unsigned char*)s;
+
+        if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
+            *s = 'a' + (c - 'A');
+            modified = true;
+        }
+        s++;
+    }
+
+    return modified;
 }
 
 /*
@@ -6664,24 +6738,13 @@ rb_str_downcase_bang(int argc, VALUE *argv, VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L6738
 
     flags = check_case_options(argc, argv, flags);
     str_modify_keep_cr(str);
-    enc = STR_ENC_GET(str);
-    rb_str_check_dummy_enc(enc);
-    if (((flags&ONIGENC_CASE_ASCII_ONLY) && (rb_enc_mbmaxlen(enc)==1))
-	|| (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str)==ENC_CODERANGE_7BIT)) {
-        char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
-
-	while (s < send) {
-	    unsigned int c = *(unsigned char*)s;
-
-	    if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
-		*s = 'a' + (c - 'A');
-		flags |= ONIGENC_CASE_MODIFIED;
-	    }
-	    s++;
-	}
+    enc = str_true_enc(str);
+    if (case_option_single_p(flags, enc, str)) {
+        if (downcase_single(str))
+            flags |= ONIGENC_CASE_MODIFIED;
     }
     else if (flags&ONIGENC_CASE_ASCII_ONLY)
-        rb_str_ascii_casemap(str, &flags, enc);
+        rb_str_ascii_casemap(str, str, &flags, enc);
     else
 	str_shared_replace(str, rb_str_casemap(str, &flags, enc));
 
@@ -6743,9 +6806,27 @@ rb_str_downcase_bang(int argc, VALUE *argv, VALUE str) https://github.com/ruby/ruby/blob/trunk/string.c#L6806
 static VALUE
 rb_str_downcase(int argc, VALUE *argv, VALUE str)
 {
-    str = rb_str_dup(str);
-    rb_str_downcase_bang(argc, argv, str);
-    return str;
+    rb_encoding *enc;
+    OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
+    VALUE ret;
+
+    flags = check_case_options(argc, argv, flag (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]