[前][次][番号順一覧][スレッド一覧]

ruby-changes:58618

From: =E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3 <ko1@a...>
Date: Thu, 7 Nov 2019 17:41:49 +0900 (JST)
Subject: [ruby-changes:58618] d45a013a1a (master): extend rb_call_cache

https://git.ruby-lang.org/ruby.git/commit/?id=d45a013a1a

From d45a013a1a3bcc860e6f7f303220b3297e2abdbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3?=
 <shyouhei@r...>
Date: Mon, 7 Oct 2019 12:59:57 +0900
Subject: extend rb_call_cache

Prior to this changeset, majority of inline cache mishits resulted
into the same method entry when rb_callable_method_entry() resolves
a method search.  Let's not call the function at the first place on
such situations.

In doing so we extend the struct rb_call_cache from 44 bytes (in
case of 64 bit machine) to 64 bytes, and fill the gap with
secondary class serial(s).  Call cache's class serials now behavies
as a LRU cache.

Calculating -------------------------------------
                           ours         2.7         2.6
vm2_poly_same_method     2.339M      1.744M      1.369M i/s - 6.000M times in 2.565086s 3.441329s 4.381386s

Comparison:
             vm2_poly_same_method
                ours:   2339103.0 i/s
                 2.7:   1743512.3 i/s - 1.34x  slower
                 2.6:   1369429.8 i/s - 1.71x  slower

diff --git a/internal.h b/internal.h
index 703dd57..1b27df0 100644
--- a/internal.h
+++ b/internal.h
@@ -2357,10 +2357,32 @@ struct rb_execution_context_struct; https://github.com/ruby/ruby/blob/trunk/internal.h#L2357
 struct rb_control_frame_struct;
 struct rb_calling_info;
 struct rb_call_data;
+/* I have several reasons to chose 64 here:
+ *
+ * - A cache line must be a power-of-two size.
+ * - Setting this to anything less than or equal to 32 boosts nothing.
+ * - I have never seen an architecture that has 128 byte L1 cache line.
+ * - I know Intel Core and Sparc T4 at least uses 64.
+ * - I know jemalloc internally has this exact same `#define CACHE_LINE 64`.
+ *   https://github.com/jemalloc/jemalloc/blob/dev/include/jemalloc/internal/jemalloc_internal_types.h
+ */
+#define CACHELINE 64
 struct rb_call_cache {
     /* inline cache: keys */
     rb_serial_t method_state;
-    rb_serial_t class_serial;
+    rb_serial_t class_serial[
+        (CACHELINE
+         - sizeof(rb_serial_t)                                   /* method_state */
+         - sizeof(struct rb_callable_method_entry_struct *)      /* me */
+         - sizeof(struct rb_callable_method_definition_struct *) /* def */
+         - sizeof(enum method_missing_reason)                    /* aux */
+         - sizeof(VALUE (*)(                                     /* call */
+               struct rb_execution_context_struct *e,
+               struct rb_control_frame_struct *,
+               struct rb_calling_info *,
+               const struct rb_call_data *)))
+        / sizeof(rb_serial_t)
+    ];
 
     /* inline cache: values */
     const struct rb_callable_method_entry_struct *me;
@@ -2377,6 +2399,7 @@ struct rb_call_cache { https://github.com/ruby/ruby/blob/trunk/internal.h#L2399
         int inc_sp; /* used by cfunc */
     } aux;
 };
+STATIC_ASSERT(cachelined, sizeof(struct rb_call_cache) <= CACHELINE);
 struct rb_call_info {
     /* fixed at compile time */
     ID mid;
diff --git a/mjit_compile.c b/mjit_compile.c
index 27ea836..bf5143f 100644
--- a/mjit_compile.c
+++ b/mjit_compile.c
@@ -87,7 +87,7 @@ has_valid_method_type(CALL_CACHE cc) https://github.com/ruby/ruby/blob/trunk/mjit_compile.c#L87
 {
     extern bool mjit_valid_class_serial_p(rb_serial_t class_serial);
     return GET_GLOBAL_METHOD_STATE() == cc->method_state
-        && mjit_valid_class_serial_p(cc->class_serial) && cc->me;
+        && mjit_valid_class_serial_p(cc->class_serial[0]) && cc->me;
 }
 
 // Returns true if iseq can use fastpath for setup, otherwise NULL. This becomes true in the same condition
diff --git a/tool/ruby_vm/loaders/insns_def.rb b/tool/ruby_vm/loaders/insns_def.rb
index a29d13a..47e4ba2 100644
--- a/tool/ruby_vm/loaders/insns_def.rb
+++ b/tool/ruby_vm/loaders/insns_def.rb
@@ -21,7 +21,7 @@ grammar = %r' https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/loaders/insns_def.rb#L21
     (?<keyword>  typedef | extern | static | auto | register |
                  struct  | union  | enum                           ){0}
     (?<C>        (?: \g<block> | [^{}]+ )*                         ){0}
-    (?<block>    \{ \g<ws>* ^ \g<C> $ \g<ws>* \}                   ){0}
+    (?<block>    \{ \g<ws>*   \g<C>   \g<ws>* \}                   ){0}
     (?<ws>       \g<comment> | \s                                  ){0}
     (?<ident>    [_a-zA-Z] [0-9_a-zA-Z]*                           ){0}
     (?<type>     (?: \g<keyword> \g<ws>+ )* \g<ident>              ){0}
diff --git a/tool/ruby_vm/views/_mjit_compile_send.erb b/tool/ruby_vm/views/_mjit_compile_send.erb
index 95e7846..ec8eec5 100644
--- a/tool/ruby_vm/views/_mjit_compile_send.erb
+++ b/tool/ruby_vm/views/_mjit_compile_send.erb
@@ -36,7 +36,7 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_mjit_compile_send.erb#L36
 
 % # JIT: Invalidate call cache if it requires vm_search_method. This allows to inline some of following things.
             fprintf(f, "    if (UNLIKELY(GET_GLOBAL_METHOD_STATE() != %"PRI_SERIALT_PREFIX"u ||\n", cc_copy->method_state);
-            fprintf(f, "        RCLASS_SERIAL(CLASS_OF(stack[%d])) != %"PRI_SERIALT_PREFIX"u)) {\n", b->stack_size - 1 - argc, cc_copy->class_serial);
+            fprintf(f, "        RCLASS_SERIAL(CLASS_OF(stack[%d])) != %"PRI_SERIALT_PREFIX"u)) {\n", b->stack_size - 1 - argc, cc_copy->class_serial[0]);
             fprintf(f, "        reg_cfp->pc = original_body_iseq + %d;\n", pos);
             fprintf(f, "        reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size);
             fprintf(f, "        goto send_cancel;\n");
diff --git a/vm_eval.c b/vm_eval.c
index 230b96b..4c73d73 100644
--- a/vm_eval.c
+++ b/vm_eval.c
@@ -47,7 +47,7 @@ rb_vm_call0(rb_execution_context_t *ec, VALUE recv, ID id, int argc, const VALUE https://github.com/ruby/ruby/blob/trunk/vm_eval.c#L47
 {
     struct rb_calling_info calling = { Qundef, recv, argc, kw_splat, };
     struct rb_call_info ci = { id, (kw_splat ? VM_CALL_KW_SPLAT : 0), argc, };
-    struct rb_call_cache cc = { 0, 0, me, me->def, vm_call_general, { 0, }, };
+    struct rb_call_cache cc = { 0, { 0, }, me, me->def, vm_call_general, { 0, }, };
     struct rb_call_data cd = { cc, ci, };
     return vm_call0_body(ec, &calling, &cd, argv);
 }
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 5e1cfcc..f8be5f6 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -1422,16 +1422,58 @@ rb_vm_search_method_slowpath(struct rb_call_data *cd, VALUE klass) https://github.com/ruby/ruby/blob/trunk/vm_insnhelper.c#L1422
     struct rb_call_cache *cc = &cd->cc;
     const rb_callable_method_entry_t *me =
         rb_callable_method_entry(klass, ci->mid);
-    *cc = (struct rb_call_cache) {
+    struct rb_call_cache buf = {
         GET_GLOBAL_METHOD_STATE(),
-        RCLASS_SERIAL(klass),
+        { RCLASS_SERIAL(klass) },
         me,
         me ? me->def : NULL,
         calccall(cd, me),
     };
+    if (buf.call != vm_call_general) {
+        for (int i = 0; i < numberof(cc->class_serial) - 1; i++) {
+            buf.class_serial[i + 1] = cc->class_serial[i];
+        }
+    }
+    MEMCPY(cc, &buf, struct rb_call_cache, 1);
     VM_ASSERT(callable_method_entry_p(cc->me));
 }
 
+static inline bool
+vm_cache_check_for_class_serial(struct rb_call_cache *cc, rb_serial_t class_serial)
+{
+    int i;
+    rb_serial_t j;
+
+    for (i = 0; i < numberof(cc->class_serial); i++) {
+        j = cc->class_serial[i];
+
+        if (! j) {
+            break;
+        }
+        else if (j != class_serial) {
+            continue;
+        }
+        else if (! i) {
+            return true;
+        }
+        else {
+            goto hit;
+        }
+    }
+
+    RB_DEBUG_COUNTER_INC(mc_class_serial_miss);
+    return false;
+
+  hit:
+    for (; i > 0; i--) {
+        cc->class_serial[i] = cc->class_serial[i - 1];
+    }
+
+    cc->class_serial[0] = j;
+    MEMZERO(&cc->aux, cc->aux, 1); /* cc->call is valid, but cc->aux might not. */
+    return true;
+}
+
 static void
 vm_search_method_fastpath(struct rb_call_data *cd, VALUE klass)
 {
@@ -1440,8 +1482,7 @@ vm_search_method_fastpath(struct rb_call_data *cd, VALUE klass) https://github.com/ruby/ruby/blob/trunk/vm_insnhelper.c#L1482
 #if OPT_INLINE_METHOD_CACHE
     if (LIKELY(RB_DEBUG_COUNTER_INC_UNLESS(mc_global_state_miss,
 					   GET_GLOBAL_METHOD_STATE() == cc->method_state) &&
-	       RB_DEBUG_COUNTER_INC_UNLESS(mc_class_serial_miss,
-					   RCLASS_SERIAL(klass) == cc->class_serial))) {
+               vm_cache_check_for_class_serial(cc, RCLASS_SERIAL(klass)))) {
 	/* cache hit! */
 	VM_ASSERT(cc->call != NULL);
 	RB_DEBUG_COUNTER_INC(mc_inline_hit);
@@ -1605,24 +1646,16 @@ opt_eql_func(VALUE recv, VALUE obj, CALL_DATA cd) https://github.com/ruby/ruby/blob/trunk/vm_insnhelper.c#L1646
 VALUE
 rb_equal_opt(VALUE obj1, VALUE obj2)
 {
-    struct rb_call_data cd;
+    struct rb_call_data cd = { .ci = { .mid = idEq, }, };
 
-    cd.ci.mid = idEq;
-    cd.cc.method_state = 0;
-    cd.cc.class_serial = 0;
-    cd.cc.me = NULL;
     return opt_eq_func(obj1, obj2, &cd);
 }
 
 VALUE
 rb_eql_opt(VALUE obj1, VALUE obj2)
 {
-    struct rb_call_data cd;
+    struct rb_call_data cd = { .ci = { .mid = idEqlP, }, };
 
-    cd.ci.mid = idEqlP;
-    cd.cc.method_state = 0;
-    cd.cc.class_serial = 0;
-    cd.cc.me = NULL;
     return opt_eql_func(obj1, obj2, &cd);
 }
 
-- 
cgit v0.10.2


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]