ruby-changes:68885
From: Alan <ko1@a...>
Date: Thu, 21 Oct 2021 08:15:03 +0900 (JST)
Subject: [ruby-changes:68885] b626dd7211 (master): YJIT: Fancier opt_getinlinecache
https://git.ruby-lang.org/ruby.git/commit/?id=b626dd7211 From b626dd7211b9d45f1dab6f82057445781f34f20f Mon Sep 17 00:00:00 2001 From: Alan Wu <XrXr@u...> Date: Wed, 24 Mar 2021 18:07:26 -0400 Subject: YJIT: Fancier opt_getinlinecache Make sure `opt_getinlinecache` is in a block all on its own, and invalidate it from the interpreter when `opt_setinlinecache`. It will recompile with a filled cache the second time around. This lets YJIT runs well when the IC for constant is cold. --- compile.c | 34 +++++++++++++++++-- vm_core.h | 3 ++ vm_insnhelper.c | 3 ++ yjit.h | 5 ++- yjit_codegen.c | 102 +++++++++++++++++++++++++++++--------------------------- yjit_core.c | 19 ++++++----- yjit_core.h | 1 + yjit_iface.c | 44 ++++++++++++++++++++---- yjit_iface.h | 8 ++--- 9 files changed, 146 insertions(+), 73 deletions(-) diff --git a/compile.c b/compile.c index 1e88dc242d..b11650c885 100644 --- a/compile.c +++ b/compile.c @@ -2259,6 +2259,7 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L2259 VALUE *generated_iseq; rb_event_flag_t events = 0; long data = 0; + long getinlinecache_idx = -1; int insn_num, code_index, insns_info_index, sp = 0; int stack_max = fix_sp_depth(iseq, anchor); @@ -2362,6 +2363,11 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L2363 types = insn_op_types(insn); len = insn_len(insn); + if (insn == BIN(opt_getinlinecache)) { + assert(getinlinecache_idx < 0 && "one get per set, no nesting"); + getinlinecache_idx = code_index; + } + for (j = 0; types[j]; j++) { char type = types[j]; /* printf("--> [%c - (%d-%d)]\n", type, k, j); */ @@ -2419,6 +2425,13 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) https://github.com/ruby/ruby/blob/trunk/compile.c#L2425 } generated_iseq[code_index + 1 + j] = (VALUE)ic; FL_SET(iseqv, ISEQ_MARKABLE_ISEQ); + + if (insn == BIN(opt_setinlinecache) && type == TS_IC) { + assert(getinlinecache_idx >= 0); + // Store index to the matching opt_getinlinecache on the IC for YJIT + ic->get_insn_idx = (unsigned)getinlinecache_idx; + getinlinecache_idx = -1; + } break; } case TS_CALLDATA: @@ -11107,6 +11120,7 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod https://github.com/ruby/ruby/blob/trunk/compile.c#L11120 unsigned int code_index; ibf_offset_t reading_pos = bytecode_offset; VALUE *code = ALLOC_N(VALUE, iseq_size); + long getinlinecache_idx = -1; struct rb_iseq_constant_body *load_body = iseq->body; struct rb_call_data *cd_entries = load_body->call_data; @@ -11114,13 +11128,22 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod https://github.com/ruby/ruby/blob/trunk/compile.c#L11128 for (code_index=0; code_index<iseq_size;) { /* opcode */ - const VALUE insn = code[code_index++] = ibf_load_small_value(load, &reading_pos); + const VALUE insn = code[code_index] = ibf_load_small_value(load, &reading_pos); const char *types = insn_op_types(insn); int op_index; + if (insn == BIN(opt_getinlinecache)) { + assert(getinlinecache_idx < 0 && "one get per set, no nesting"); + getinlinecache_idx = code_index; + } + + code_index++; + /* operands */ for (op_index=0; types[op_index]; op_index++, code_index++) { - switch (types[op_index]) { + char type = types[op_index]; + switch (type) { + case TS_CDHASH: case TS_VALUE: { VALUE op = ibf_load_small_value(load, &reading_pos); @@ -11168,6 +11191,13 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod https://github.com/ruby/ruby/blob/trunk/compile.c#L11191 { VALUE op = ibf_load_small_value(load, &reading_pos); code[code_index] = (VALUE)&is_entries[op]; + + if (insn == BIN(opt_setinlinecache) && type == TS_IC) { + assert(getinlinecache_idx >= 0); + // Store index to the matching opt_getinlinecache on the IC for YJIT + is_entries[op].ic_cache.get_insn_idx = (unsigned)getinlinecache_idx; + getinlinecache_idx = -1; + } } FL_SET(iseqv, ISEQ_MARKABLE_ISEQ); break; diff --git a/vm_core.h b/vm_core.h index cd8a01d1ca..6fd24e962f 100644 --- a/vm_core.h +++ b/vm_core.h @@ -236,6 +236,9 @@ STATIC_ASSERT(sizeof_iseq_inline_constant_cache_entry, https://github.com/ruby/ruby/blob/trunk/vm_core.h#L236 struct iseq_inline_constant_cache { struct iseq_inline_constant_cache_entry *entry; + // For YJIT: the index to the opt_getinlinecache instruction in the same iseq. + // It's set during compile time and constant once set. + unsigned get_insn_idx; }; struct iseq_inline_iv_cache_entry { diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 16f46e50d3..00b352df3d 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -4743,6 +4743,9 @@ vm_ic_update(const rb_iseq_t *iseq, IC ic, VALUE val, const VALUE *reg_ep) https://github.com/ruby/ruby/blob/trunk/vm_insnhelper.c#L4743 if (rb_ractor_shareable_p(val)) ice->flags |= IMEMO_CONST_CACHE_SHAREABLE; ruby_vm_const_missing_count = 0; RB_OBJ_WRITE(iseq, &ic->entry, ice); +#ifndef MJIT_HEADER + yjit_constant_ic_update(iseq, ic); +#endif } static VALUE diff --git a/yjit.h b/yjit.h index cfb25a529d..00ed486054 100644 --- a/yjit.h +++ b/yjit.h @@ -5,9 +5,7 @@ https://github.com/ruby/ruby/blob/trunk/yjit.h#L5 #ifndef YJIT_H #define YJIT_H 1 -#include "stddef.h" -#include "stdint.h" -#include "stdbool.h" +#include "vm_core.h" #include "method.h" #ifdef _WIN32 @@ -61,5 +59,6 @@ void rb_yjit_iseq_mark(const struct rb_iseq_constant_body *body); https://github.com/ruby/ruby/blob/trunk/yjit.h#L59 void rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body); void rb_yjit_iseq_free(const struct rb_iseq_constant_body *body); void rb_yjit_before_ractor_spawn(void); +void yjit_constant_ic_update(const rb_iseq_t *iseq, IC ic); #endif // #ifndef YJIT_H diff --git a/yjit_codegen.c b/yjit_codegen.c index 7ff59b94a1..ce9e56a157 100644 --- a/yjit_codegen.c +++ b/yjit_codegen.c @@ -43,7 +43,7 @@ jit_print_loc(jitstate_t* jit, const char* msg) https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L43 static int jit_get_opcode(jitstate_t* jit) { - return opcode_at_pc(jit->iseq, jit->pc); + return yjit_opcode_at_pc(jit->iseq, jit->pc); } // Get the index of the next instruction @@ -147,7 +147,7 @@ yjit_gen_exit(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb) https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L147 // Write back the old instruction at the exit PC // Otherwise the interpreter may jump right back to the // JITted code we're trying to exit - int exit_opcode = opcode_at_pc(jit->iseq, exit_pc); + int exit_opcode = yjit_opcode_at_pc(jit->iseq, exit_pc); void* handler_addr = (void*)handler_table[exit_opcode]; mov(cb, REG0, const_ptr_opnd(exit_pc)); mov(cb, REG1, const_ptr_opnd(handler_addr)); @@ -255,9 +255,8 @@ yjit_entry_prologue(void) https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L255 return code_ptr; } -/* -Generate code to check for interrupts and take a side-exit -*/ + +// Generate code to check for interrupts and take a side-exit static void yjit_check_ints(codeblock_t* cb, uint8_t* side_exit) { @@ -269,17 +268,36 @@ yjit_check_ints(codeblock_t* cb, uint8_t* side_exit) https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L268 jnz_ptr(cb, side_exit); } -/* -Compile a sequence of bytecode instructions for a given basic block version -*/ +// Generate a stubbed unconditional jump to the next bytecode instruction. +// Blocks that are part of a guard chain can use this to share the same successor. +static void +jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context) +{ + // Reset the depth since in current usages we only ever jump to to + // chain_depth > 0 from the same instruction. + ctx_t reset_depth = *current_context; + reset_depth.chain_depth = 0; + + blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) }; + + // Generate the jump instruction + gen_direct_jump( + &reset_depth, + jump_block + ); +} + + +// Compile a sequence of bytecode instructions for a given basic block version void -yjit_gen_block(ctx_t* ctx, block_t* block, rb_execution_context_t* ec) +yjit_gen_block(ctx_t *ctx, block_t *block, rb_execution_context_t *ec) { RUBY_ASSERT(cb != NULL); RUBY_ASSERT(block != NULL); const rb_iseq_t *iseq = block->blockid.iseq; uint32_t insn_idx = block->blockid.idx; + const uint32_t starting_insn_idx = insn_idx; // NOTE: if we are ever deployed in production, we // should probably just log an error and return NULL here, @@ -305,13 +323,21 @@ yjit_gen_block(ctx_t* ctx, block_t* block, rb_execution_context_t* ec) https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L323 // For each instruction to compile for (;;) { + // Get the current pc and opcode + VALUE *pc = yjit_iseq_pc_at_idx(iseq, insn_idx); + int opcode = yjit_opcode_at_pc(iseq, pc); + RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE); + + (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/