[前][次][番号順一覧][スレッド一覧]

ruby-changes:72622

From: Jean <ko1@a...>
Date: Thu, 21 Jul 2022 22:07:02 +0900 (JST)
Subject: [ruby-changes:72622] f954c5dae4 (master): string.c: use str_enc_fastpath in TERM_LEN

https://git.ruby-lang.org/ruby.git/commit/?id=f954c5dae4

From f954c5dae4c144207bd366cbc832d08930882b23 Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@g...>
Date: Thu, 21 Jul 2022 13:16:16 +0200
Subject: string.c: use str_enc_fastpath in TERM_LEN

Not having to fetch the rb_encoding save a significant
amount of time.

Additionally, even when we have to fetch it, we can do
it faster using `ENCODING_GET` rather than `rb_enc_get`.

```
compare-ruby: ruby 3.2.0dev (2022-07-19T08:41:40Z master cb9fd920a3) [arm64-darwin21]
built-ruby: ruby 3.2.0dev (2022-07-21T11:16:16Z faster-buffer-conc.. 4f001f0748) [arm64-darwin21]
warming up...

|                      |compare-ruby|built-ruby|
|:---------------------|-----------:|---------:|
|binary_concat_utf8    |    510.580k|  565.600k|
|                      |           -|     1.11x|
|binary_concat_binary  |    512.653k|  571.483k|
|                      |           -|     1.11x|
|utf8_concat_utf8      |    511.396k|  566.879k|
|                      |           -|     1.11x|
```
---
 benchmark/string_concat.yml |  7 ++++---
 string.c                    | 30 +++++++++++++++---------------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/benchmark/string_concat.yml b/benchmark/string_concat.yml
index 656bcd1cd7..0ff1dc25b6 100644
--- a/benchmark/string_concat.yml
+++ b/benchmark/string_concat.yml
@@ -1,6 +1,7 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/string_concat.yml#L1
 prelude: |
   CHUNK = "a" * 64
   BCHUNK = "a".b * 64
+  GC.disable # GC causes a lot of variance
 benchmark:
   binary_concat_utf8: |
     buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
@@ -11,7 +12,7 @@ benchmark: https://github.com/ruby/ruby/blob/trunk/benchmark/string_concat.yml#L12
     buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
     buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
     buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
-    buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
+    buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
   binary_concat_binary: |
     buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
     buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
@@ -21,7 +22,7 @@ benchmark: https://github.com/ruby/ruby/blob/trunk/benchmark/string_concat.yml#L22
     buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
     buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
     buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
-    buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
+    buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
   utf8_concat_utf8: |
     buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
     buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
@@ -31,4 +32,4 @@ benchmark: https://github.com/ruby/ruby/blob/trunk/benchmark/string_concat.yml#L32
     buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
     buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
     buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
-    buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
+    buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
diff --git a/string.c b/string.c
index 8f519da7d1..c726adb2c3 100644
--- a/string.c
+++ b/string.c
@@ -150,7 +150,21 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L150
     }\
 } while (0)
 
-#define TERM_LEN(str) rb_enc_mbminlen(rb_enc_get(str))
+static inline bool
+str_enc_fastpath(VALUE str)
+{
+    // The overwhelming majority of strings are in one of these 3 encodings.
+    switch (ENCODING_GET_INLINED(str)) {
+      case ENCINDEX_ASCII_8BIT:
+      case ENCINDEX_UTF_8:
+      case ENCINDEX_US_ASCII:
+        return true;
+      default:
+        return false;
+    }
+}
+
+#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
 #define TERM_FILL(ptr, termlen) do {\
     char *const term_fill_ptr = (ptr);\
     const int term_fill_len = (termlen);\
@@ -3311,20 +3325,6 @@ rb_str_buf_cat_ascii(VALUE str, const char *ptr) https://github.com/ruby/ruby/blob/trunk/string.c#L3325
     }
 }
 
-static inline bool
-str_enc_fastpath(VALUE str)
-{
-    // The overwhelming majority of strings are in one of these 3 encodings.
-    switch (ENCODING_GET_INLINED(str)) {
-      case ENCINDEX_ASCII_8BIT:
-      case ENCINDEX_UTF_8:
-      case ENCINDEX_US_ASCII:
-        return true;
-      default:
-        return false;
-    }
-}
-
 VALUE
 rb_str_buf_append(VALUE str, VALUE str2)
 {
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]