[前][次][番号順一覧][スレッド一覧]

ruby-changes:58387

From: Alan <ko1@a...>
Date: Thu, 24 Oct 2019 18:04:02 +0900 (JST)
Subject: [ruby-changes:58387] 89e7997622 (master): Combine call info and cache to speed up method invocation

https://git.ruby-lang.org/ruby.git/commit/?id=89e7997622

From 89e7997622038f82115f34dbb4ea382e02bed163 Mon Sep 17 00:00:00 2001
From: Alan Wu <XrXr@u...>
Date: Tue, 30 Jul 2019 21:36:05 -0400
Subject: Combine call info and cache to speed up method invocation

To perform a regular method call, the VM needs two structs,
`rb_call_info` and `rb_call_cache`. At the moment, we allocate these two
structures in separate buffers. In the worst case, the CPU needs to read
4 cache lines to complete a method call. Putting the two structures
together reduces the maximum number of cache line reads to 2.

Combining the structures also saves 8 bytes per call site as the current
layout uses separate two pointers for the call info and the call cache.
This saves about 2 MiB on Discourse.

This change improves the Optcarrot benchmark at least 3%. For more
details, see attached bugs.ruby-lang.org ticket.

Complications:
 - A new instruction attribute `comptime_sp_inc` is introduced to
 calculate SP increase at compile time without using call caches. At
 compile time, a `TS_CALLDATA` operand points to a call info struct, but
 at runtime, the same operand points to a call data struct. Instruction
 that explicitly define `sp_inc` also need to define `comptime_sp_inc`.
 - MJIT code for copying call cache becomes slightly more complicated.
 - This changes the bytecode format, which might break existing tools.

[Misc #16258]

diff --git a/common.mk b/common.mk
index d44a62e..5c580b1 100644
--- a/common.mk
+++ b/common.mk
@@ -974,7 +974,7 @@ $(srcs_vpath)insns.inc: $(srcdir)/tool/ruby_vm/views/insns.inc.erb $(inc_common_ https://github.com/ruby/ruby/blob/trunk/common.mk#L974
 $(srcs_vpath)insns_info.inc: $(srcdir)/tool/ruby_vm/views/insns_info.inc.erb $(inc_common_headers) \
   $(srcdir)/tool/ruby_vm/views/_insn_type_chars.erb $(srcdir)/tool/ruby_vm/views/_insn_name_info.erb \
   $(srcdir)/tool/ruby_vm/views/_insn_len_info.erb $(srcdir)/tool/ruby_vm/views/_insn_operand_info.erb \
-  $(srcdir)/tool/ruby_vm/views/_attributes.erb $(srcdir)/tool/ruby_vm/views/_insn_stack_increase.erb \
+  $(srcdir)/tool/ruby_vm/views/_attributes.erb $(srcdir)/tool/ruby_vm/views/_comptime_insn_stack_increase.erb \
   $(srcdir)/tool/ruby_vm/views/_insn_sp_pc_dependency.erb
 $(srcs_vpath)vmtc.inc: $(srcdir)/tool/ruby_vm/views/vmtc.inc.erb $(inc_common_headers)
 $(srcs_vpath)vm.inc: $(srcdir)/tool/ruby_vm/views/vm.inc.erb $(inc_common_headers) \
diff --git a/compile.c b/compile.c
index 0ca8757..d0f4ce4 100644
--- a/compile.c
+++ b/compile.c
@@ -546,6 +546,31 @@ verify_list(ISEQ_ARG_DECLARE const char *info, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L546
 #define verify_list(info, anchor) verify_list(iseq, (info), (anchor))
 #endif
 
+static void
+verify_call_cache(rb_iseq_t *iseq)
+{
+    return; /* comment out to enable */
+
+    VALUE *original = rb_iseq_original_iseq(iseq);
+    size_t i = 0;
+    while (i < iseq->body->iseq_size) {
+        VALUE insn = original[i];
+        const char *types = insn_op_types(insn);
+
+        for (int j=0; types[j]; j++) {
+            if (types[j] == TS_CALLDATA) {
+                struct rb_call_cache cc;
+                struct rb_call_data *cd = (struct rb_call_data *)original[i+j+1];
+                MEMZERO(&cc, cc, 1);
+                if (memcmp(&cc, &cd->cc, sizeof(cc))) {
+                    rb_bug("call cache not zero for fresh iseq");
+                }
+            }
+        }
+        i += insn_len(insn);
+    }
+}
+
 /*
  * elem1, elem2 => elem1, elem2, elem
  */
@@ -1178,11 +1203,10 @@ new_callinfo(rb_iseq_t *iseq, ID mid, int argc, unsigned int flag, struct rb_cal https://github.com/ruby/ruby/blob/trunk/compile.c#L1203
 static INSN *
 new_insn_send(rb_iseq_t *iseq, int line_no, ID id, VALUE argc, const rb_iseq_t *blockiseq, VALUE flag, struct rb_call_info_kw_arg *keywords)
 {
-    VALUE *operands = compile_data_alloc2(iseq, sizeof(VALUE), 3);
+    VALUE *operands = compile_data_alloc2(iseq, sizeof(VALUE), 2);
     operands[0] = (VALUE)new_callinfo(iseq, id, FIX2INT(argc), FIX2INT(flag), keywords, blockiseq != NULL);
-    operands[1] = Qfalse; /* cache */
-    operands[2] = (VALUE)blockiseq;
-    return new_insn_core(iseq, line_no, BIN(send), 3, operands);
+    operands[1] = (VALUE)blockiseq;
+    return new_insn_core(iseq, line_no, BIN(send), 2, operands);
 }
 
 static rb_iseq_t *
@@ -1358,6 +1382,7 @@ iseq_setup(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L1382
 	VALUE str = rb_iseq_disasm(iseq);
 	printf("%s\n", StringValueCStr(str));
     }
+    verify_call_cache(iseq);
     debugs("[compile step: finish]\n");
 
     return COMPILE_OK;
@@ -2090,13 +2115,10 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L2115
     insns_info = ALLOC_N(struct iseq_insn_info_entry, insn_num);
     positions = ALLOC_N(unsigned int, insn_num);
     body->is_entries = ZALLOC_N(union iseq_inline_storage_entry, body->is_size);
-    body->ci_entries =
-        rb_xmalloc_mul_add_mul(
-            sizeof(struct rb_call_info), body->ci_size,
-            sizeof(struct rb_call_info_with_kwarg), body->ci_kw_size);
-    MEMZERO(body->ci_entries + body->ci_size, struct rb_call_info_with_kwarg,  body->ci_kw_size); /* need to clear ci_kw entries */
-    body->cc_entries = ZALLOC_N(struct rb_call_cache, body->ci_size + body->ci_kw_size);
-
+    body->call_data =
+        rb_xcalloc_mul_add_mul(
+            sizeof(struct rb_call_data), body->ci_size,
+            sizeof(struct rb_kwarg_call_data), body->ci_kw_size);
     ISEQ_COMPILE_DATA(iseq)->ci_index = ISEQ_COMPILE_DATA(iseq)->ci_kw_index = 0;
 
     list = FIRST_ELEMENT(anchor);
@@ -2180,33 +2202,27 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L2202
 			    generated_iseq[code_index + 1 + j] = (VALUE)ic;
 			    break;
 			}
-		      case TS_CALLINFO: /* call info */
-			{
-			    struct rb_call_info *base_ci = (struct rb_call_info *)operands[j];
-			    struct rb_call_info *ci;
-
-			    if (base_ci->flag & VM_CALL_KWARG) {
-				struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&body->ci_entries[body->ci_size];
-				struct rb_call_info_with_kwarg *ci_kw = &ci_kw_entries[ISEQ_COMPILE_DATA(iseq)->ci_kw_index++];
-				*ci_kw = *((struct rb_call_info_with_kwarg *)base_ci);
-				ci = (struct rb_call_info *)ci_kw;
-				assert(ISEQ_COMPILE_DATA(iseq)->ci_kw_index <= body->ci_kw_size);
-			    }
-			    else {
-				ci = &body->ci_entries[ISEQ_COMPILE_DATA(iseq)->ci_index++];
-				*ci = *base_ci;
-				assert(ISEQ_COMPILE_DATA(iseq)->ci_index <= body->ci_size);
-			    }
+                        case TS_CALLDATA:
+                        {
+                            struct rb_call_info *source_ci = (struct rb_call_info *)operands[j];
+                            struct rb_call_data *cd;
+
+                            if (source_ci->flag & VM_CALL_KWARG) {
+                                struct rb_kwarg_call_data *kw_calls = (struct rb_kwarg_call_data *)&body->call_data[body->ci_size];
+                                struct rb_kwarg_call_data *cd_kw = &kw_calls[ISEQ_COMPILE_DATA(iseq)->ci_kw_index++];
+                                cd_kw->ci_kw = *((struct rb_call_info_with_kwarg *)source_ci);
+                                cd = (struct rb_call_data *)cd_kw;
+                                assert(ISEQ_COMPILE_DATA(iseq)->ci_kw_index <= body->ci_kw_size);
+                            }
+                            else {
+                                cd = &body->call_data[ISEQ_COMPILE_DATA(iseq)->ci_index++];
+                                cd->ci = *source_ci;
+                                assert(ISEQ_COMPILE_DATA(iseq)->ci_index <= body->ci_size);
+                            }
 
-			    generated_iseq[code_index + 1 + j] = (VALUE)ci;
-			    break;
-			}
-		      case TS_CALLCACHE:
-			{
-			    struct rb_call_cache *cc = &body->cc_entries[ISEQ_COMPILE_DATA(iseq)->ci_index + ISEQ_COMPILE_DATA(iseq)->ci_kw_index - 1];
-			    generated_iseq[code_index + 1 + j] = (VALUE)cc;
-			    break;
-			}
+                            generated_iseq[code_index + 1 + j] = (VALUE)cd;
+                            break;
+                        }
 		      case TS_ID: /* ID */
 			generated_iseq[code_index + 1 + j] = SYM2ID(operands[j]);
 			break;
@@ -2530,7 +2546,7 @@ remove_unreachable_chunk(rb_iseq_t *iseq, LINK_ELEMENT *i) https://github.com/ruby/ruby/blob/trunk/compile.c#L2546
 		  case TS_OFFSET:
 		    unref_destination((INSN *)i, pos);
 		    break;
-		  case TS_CALLINFO:
+                  case TS_CALLDATA:
 		    if (((struct rb_call_info *)OPERAND_AT(i, pos))->flag & VM_CALL_KWARG)
 			--(body->ci_kw_size);
 		    else
@@ -3147,9 +3163,9 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal https://github.com/ruby/ruby/blob/trunk/compile.c#L3163
 	}
 
 	if (piobj) {
-	    struct rb_call_info *ci = (struct rb_call_info *)piobj->operands[0];
+            struct rb_call_info *ci = (struct rb_call_info *)OPERAND_AT(piobj, 0);
 	    if (IS_INSN_ID(piobj, send) || IS_INSN_ID(piobj, invokesuper)) {
-		if (piobj->operands[2] == 0) { /* no blockiseq */
+                if (OPERAND_AT(piobj, 1) == 0) { /* no blockiseq */
 		    ci->flag |= VM_CALL_TAILCALL;
 		}
 	    }
@@ -3208,12 +3224,10 @@ insn_set_specialized_instruction(rb_iseq_t *iseq, INSN *iobj, int insn_id) https://github.com/ruby/ruby/blob/trunk/compile.c#L3224
 
     if (insn_id == BIN(opt_neq)) {
 	VALUE *old_operands = iobj->operands;
-	iobj->operand_size = 4;
+        iobj->operand_size = 2;
         iobj->operands = compile_data_alloc2(iseq, iobj->operand_size, sizeof(VALUE));
-	iobj->operands[0] = (VALUE)new_callinfo(iseq, idEq, 1, 0, NULL, FALSE);
-	iobj->operands[1] = Qfalse; /* CALL_CACHE */
-	iobj->operands[2] = old_operands[0];
-	iobj->operands[3] = Qfalse; /* CALL_CACHE */
+ (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]