ruby-changes:58387
From: Alan <ko1@a...>
Date: Thu, 24 Oct 2019 18:04:02 +0900 (JST)
Subject: [ruby-changes:58387] 89e7997622 (master): Combine call info and cache to speed up method invocation
https://git.ruby-lang.org/ruby.git/commit/?id=89e7997622 From 89e7997622038f82115f34dbb4ea382e02bed163 Mon Sep 17 00:00:00 2001 From: Alan Wu <XrXr@u...> Date: Tue, 30 Jul 2019 21:36:05 -0400 Subject: Combine call info and cache to speed up method invocation To perform a regular method call, the VM needs two structs, `rb_call_info` and `rb_call_cache`. At the moment, we allocate these two structures in separate buffers. In the worst case, the CPU needs to read 4 cache lines to complete a method call. Putting the two structures together reduces the maximum number of cache line reads to 2. Combining the structures also saves 8 bytes per call site as the current layout uses separate two pointers for the call info and the call cache. This saves about 2 MiB on Discourse. This change improves the Optcarrot benchmark at least 3%. For more details, see attached bugs.ruby-lang.org ticket. Complications: - A new instruction attribute `comptime_sp_inc` is introduced to calculate SP increase at compile time without using call caches. At compile time, a `TS_CALLDATA` operand points to a call info struct, but at runtime, the same operand points to a call data struct. Instruction that explicitly define `sp_inc` also need to define `comptime_sp_inc`. - MJIT code for copying call cache becomes slightly more complicated. - This changes the bytecode format, which might break existing tools. [Misc #16258] diff --git a/common.mk b/common.mk index d44a62e..5c580b1 100644 --- a/common.mk +++ b/common.mk @@ -974,7 +974,7 @@ $(srcs_vpath)insns.inc: $(srcdir)/tool/ruby_vm/views/insns.inc.erb $(inc_common_ https://github.com/ruby/ruby/blob/trunk/common.mk#L974 $(srcs_vpath)insns_info.inc: $(srcdir)/tool/ruby_vm/views/insns_info.inc.erb $(inc_common_headers) \ $(srcdir)/tool/ruby_vm/views/_insn_type_chars.erb $(srcdir)/tool/ruby_vm/views/_insn_name_info.erb \ $(srcdir)/tool/ruby_vm/views/_insn_len_info.erb $(srcdir)/tool/ruby_vm/views/_insn_operand_info.erb \ - $(srcdir)/tool/ruby_vm/views/_attributes.erb $(srcdir)/tool/ruby_vm/views/_insn_stack_increase.erb \ + $(srcdir)/tool/ruby_vm/views/_attributes.erb $(srcdir)/tool/ruby_vm/views/_comptime_insn_stack_increase.erb \ $(srcdir)/tool/ruby_vm/views/_insn_sp_pc_dependency.erb $(srcs_vpath)vmtc.inc: $(srcdir)/tool/ruby_vm/views/vmtc.inc.erb $(inc_common_headers) $(srcs_vpath)vm.inc: $(srcdir)/tool/ruby_vm/views/vm.inc.erb $(inc_common_headers) \ diff --git a/compile.c b/compile.c index 0ca8757..d0f4ce4 100644 --- a/compile.c +++ b/compile.c @@ -546,6 +546,31 @@ verify_list(ISEQ_ARG_DECLARE const char *info, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L546 #define verify_list(info, anchor) verify_list(iseq, (info), (anchor)) #endif +static void +verify_call_cache(rb_iseq_t *iseq) +{ + return; /* comment out to enable */ + + VALUE *original = rb_iseq_original_iseq(iseq); + size_t i = 0; + while (i < iseq->body->iseq_size) { + VALUE insn = original[i]; + const char *types = insn_op_types(insn); + + for (int j=0; types[j]; j++) { + if (types[j] == TS_CALLDATA) { + struct rb_call_cache cc; + struct rb_call_data *cd = (struct rb_call_data *)original[i+j+1]; + MEMZERO(&cc, cc, 1); + if (memcmp(&cc, &cd->cc, sizeof(cc))) { + rb_bug("call cache not zero for fresh iseq"); + } + } + } + i += insn_len(insn); + } +} + /* * elem1, elem2 => elem1, elem2, elem */ @@ -1178,11 +1203,10 @@ new_callinfo(rb_iseq_t *iseq, ID mid, int argc, unsigned int flag, struct rb_cal https://github.com/ruby/ruby/blob/trunk/compile.c#L1203 static INSN * new_insn_send(rb_iseq_t *iseq, int line_no, ID id, VALUE argc, const rb_iseq_t *blockiseq, VALUE flag, struct rb_call_info_kw_arg *keywords) { - VALUE *operands = compile_data_alloc2(iseq, sizeof(VALUE), 3); + VALUE *operands = compile_data_alloc2(iseq, sizeof(VALUE), 2); operands[0] = (VALUE)new_callinfo(iseq, id, FIX2INT(argc), FIX2INT(flag), keywords, blockiseq != NULL); - operands[1] = Qfalse; /* cache */ - operands[2] = (VALUE)blockiseq; - return new_insn_core(iseq, line_no, BIN(send), 3, operands); + operands[1] = (VALUE)blockiseq; + return new_insn_core(iseq, line_no, BIN(send), 2, operands); } static rb_iseq_t * @@ -1358,6 +1382,7 @@ iseq_setup(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L1382 VALUE str = rb_iseq_disasm(iseq); printf("%s\n", StringValueCStr(str)); } + verify_call_cache(iseq); debugs("[compile step: finish]\n"); return COMPILE_OK; @@ -2090,13 +2115,10 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L2115 insns_info = ALLOC_N(struct iseq_insn_info_entry, insn_num); positions = ALLOC_N(unsigned int, insn_num); body->is_entries = ZALLOC_N(union iseq_inline_storage_entry, body->is_size); - body->ci_entries = - rb_xmalloc_mul_add_mul( - sizeof(struct rb_call_info), body->ci_size, - sizeof(struct rb_call_info_with_kwarg), body->ci_kw_size); - MEMZERO(body->ci_entries + body->ci_size, struct rb_call_info_with_kwarg, body->ci_kw_size); /* need to clear ci_kw entries */ - body->cc_entries = ZALLOC_N(struct rb_call_cache, body->ci_size + body->ci_kw_size); - + body->call_data = + rb_xcalloc_mul_add_mul( + sizeof(struct rb_call_data), body->ci_size, + sizeof(struct rb_kwarg_call_data), body->ci_kw_size); ISEQ_COMPILE_DATA(iseq)->ci_index = ISEQ_COMPILE_DATA(iseq)->ci_kw_index = 0; list = FIRST_ELEMENT(anchor); @@ -2180,33 +2202,27 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L2202 generated_iseq[code_index + 1 + j] = (VALUE)ic; break; } - case TS_CALLINFO: /* call info */ - { - struct rb_call_info *base_ci = (struct rb_call_info *)operands[j]; - struct rb_call_info *ci; - - if (base_ci->flag & VM_CALL_KWARG) { - struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&body->ci_entries[body->ci_size]; - struct rb_call_info_with_kwarg *ci_kw = &ci_kw_entries[ISEQ_COMPILE_DATA(iseq)->ci_kw_index++]; - *ci_kw = *((struct rb_call_info_with_kwarg *)base_ci); - ci = (struct rb_call_info *)ci_kw; - assert(ISEQ_COMPILE_DATA(iseq)->ci_kw_index <= body->ci_kw_size); - } - else { - ci = &body->ci_entries[ISEQ_COMPILE_DATA(iseq)->ci_index++]; - *ci = *base_ci; - assert(ISEQ_COMPILE_DATA(iseq)->ci_index <= body->ci_size); - } + case TS_CALLDATA: + { + struct rb_call_info *source_ci = (struct rb_call_info *)operands[j]; + struct rb_call_data *cd; + + if (source_ci->flag & VM_CALL_KWARG) { + struct rb_kwarg_call_data *kw_calls = (struct rb_kwarg_call_data *)&body->call_data[body->ci_size]; + struct rb_kwarg_call_data *cd_kw = &kw_calls[ISEQ_COMPILE_DATA(iseq)->ci_kw_index++]; + cd_kw->ci_kw = *((struct rb_call_info_with_kwarg *)source_ci); + cd = (struct rb_call_data *)cd_kw; + assert(ISEQ_COMPILE_DATA(iseq)->ci_kw_index <= body->ci_kw_size); + } + else { + cd = &body->call_data[ISEQ_COMPILE_DATA(iseq)->ci_index++]; + cd->ci = *source_ci; + assert(ISEQ_COMPILE_DATA(iseq)->ci_index <= body->ci_size); + } - generated_iseq[code_index + 1 + j] = (VALUE)ci; - break; - } - case TS_CALLCACHE: - { - struct rb_call_cache *cc = &body->cc_entries[ISEQ_COMPILE_DATA(iseq)->ci_index + ISEQ_COMPILE_DATA(iseq)->ci_kw_index - 1]; - generated_iseq[code_index + 1 + j] = (VALUE)cc; - break; - } + generated_iseq[code_index + 1 + j] = (VALUE)cd; + break; + } case TS_ID: /* ID */ generated_iseq[code_index + 1 + j] = SYM2ID(operands[j]); break; @@ -2530,7 +2546,7 @@ remove_unreachable_chunk(rb_iseq_t *iseq, LINK_ELEMENT *i) https://github.com/ruby/ruby/blob/trunk/compile.c#L2546 case TS_OFFSET: unref_destination((INSN *)i, pos); break; - case TS_CALLINFO: + case TS_CALLDATA: if (((struct rb_call_info *)OPERAND_AT(i, pos))->flag & VM_CALL_KWARG) --(body->ci_kw_size); else @@ -3147,9 +3163,9 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal https://github.com/ruby/ruby/blob/trunk/compile.c#L3163 } if (piobj) { - struct rb_call_info *ci = (struct rb_call_info *)piobj->operands[0]; + struct rb_call_info *ci = (struct rb_call_info *)OPERAND_AT(piobj, 0); if (IS_INSN_ID(piobj, send) || IS_INSN_ID(piobj, invokesuper)) { - if (piobj->operands[2] == 0) { /* no blockiseq */ + if (OPERAND_AT(piobj, 1) == 0) { /* no blockiseq */ ci->flag |= VM_CALL_TAILCALL; } } @@ -3208,12 +3224,10 @@ insn_set_specialized_instruction(rb_iseq_t *iseq, INSN *iobj, int insn_id) https://github.com/ruby/ruby/blob/trunk/compile.c#L3224 if (insn_id == BIN(opt_neq)) { VALUE *old_operands = iobj->operands; - iobj->operand_size = 4; + iobj->operand_size = 2; iobj->operands = compile_data_alloc2(iseq, iobj->operand_size, sizeof(VALUE)); - iobj->operands[0] = (VALUE)new_callinfo(iseq, idEq, 1, 0, NULL, FALSE); - iobj->operands[1] = Qfalse; /* CALL_CACHE */ - iobj->operands[2] = old_operands[0]; - iobj->operands[3] = Qfalse; /* CALL_CACHE */ + (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/