ruby-changes:68741
From: Maxime <ko1@a...>
Date: Thu, 21 Oct 2021 08:13:15 +0900 (JST)
Subject: [ruby-changes:68741] 9d8cc01b75 (master): WIP JIT-to-JIT returns
https://git.ruby-lang.org/ruby.git/commit/?id=9d8cc01b75 From 9d8cc01b758f9385bd4c806f3daff9719e07faa0 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@s...> Date: Tue, 9 Feb 2021 16:24:06 -0500 Subject: WIP JIT-to-JIT returns --- ujit_codegen.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++-------- ujit_core.c | 22 ++++++++++------ ujit_core.h | 5 +++- vm.c | 2 +- vm_core.h | 2 ++ vm_insnhelper.c | 1 + 6 files changed, 91 insertions(+), 20 deletions(-) diff --git a/ujit_codegen.c b/ujit_codegen.c index b9d2a0a7cb..4a2a4c865e 100644 --- a/ujit_codegen.c +++ b/ujit_codegen.c @@ -114,7 +114,7 @@ Compile an interpreter entry block to be inserted into an iseq https://github.com/ruby/ruby/blob/trunk/ujit_codegen.c#L114 Returns `NULL` if compilation fails. */ uint8_t* -ujit_entry_prologue() +ujit_entry_prologue(void) { RUBY_ASSERT(cb != NULL); @@ -248,9 +248,9 @@ gen_dup(jitstate_t* jit, ctx_t* ctx) https://github.com/ruby/ruby/blob/trunk/ujit_codegen.c#L248 x86opnd_t dup_val = ctx_stack_pop(ctx, 1); x86opnd_t loc0 = ctx_stack_push(ctx, T_NONE); x86opnd_t loc1 = ctx_stack_push(ctx, T_NONE); - mov(cb, RAX, dup_val); - mov(cb, loc0, RAX); - mov(cb, loc1, RAX); + mov(cb, REG0, dup_val); + mov(cb, loc0, REG0); + mov(cb, loc1, REG0); return true; } @@ -1191,6 +1191,23 @@ gen_opt_swb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const r https://github.com/ruby/ruby/blob/trunk/ujit_codegen.c#L1191 bool rb_simple_iseq_p(const rb_iseq_t *iseq); +void +gen_return_branch(codeblock_t* cb, uint8_t* target0, uint8_t* target1, uint8_t shape) +{ + switch (shape) + { + case SHAPE_NEXT0: + case SHAPE_NEXT1: + RUBY_ASSERT(false); + break; + + case SHAPE_DEFAULT: + mov(cb, REG0, const_ptr_opnd(target0)); + mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0); + break; + } +} + static bool gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_callable_method_entry_t *cme, int32_t argc) { @@ -1251,13 +1268,32 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb https://github.com/ruby/ruby/blob/trunk/ujit_codegen.c#L1268 cmp(cb, klass_opnd, REG1); jne_ptr(cb, side_exit); - // Store incremented PC into current control frame in case callee raises. + // Store the updated SP on the current frame (pop arguments and receiver) + lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1))); + mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0); + + // Store the next PC i the current frame mov(cb, REG0, const_ptr_opnd(jit->pc + insn_len(BIN(opt_send_without_block)))); mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), REG0); - // Store the updated SP on the CFP (pop arguments and receiver) - lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1))); - mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0); + // Stub so we can return to JITted code + blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) }; + + // Pop arguments and receiver in return context, push the return value + // After the return, the JIT and interpreter SP will match up + ctx_t return_ctx = *ctx; + ctx_stack_pop(&return_ctx, argc); + return_ctx.sp_offset = 0; + + // Write the JIT return address on the current frame + gen_branch( + ctx, + return_block, + &return_ctx, + return_block, + &return_ctx, + gen_return_branch + ); // Stack overflow check // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) @@ -1327,7 +1363,6 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb https://github.com/ruby/ruby/blob/trunk/ujit_codegen.c#L1363 &DEFAULT_CTX, (blockid_t){ iseq, 0 } ); - // TODO: create stub for call continuation @@ -1432,7 +1467,31 @@ gen_leave(jitstate_t* jit, ctx_t* ctx) https://github.com/ruby/ruby/blob/trunk/ujit_codegen.c#L1467 mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp)); mov(cb, mem_opnd(64, REG_SP, -SIZEOF_VALUE), REG0); - // Write the post call bytes + + + + + + + // Load the JIT return address + mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, jit_return)); + + // If the return address is NULL, fall back to the interpreter + int FALLBACK_LABEL = cb_new_label(cb, "FALLBACK"); + cmp(cb, REG0, imm_opnd(0)); + jz(cb, FALLBACK_LABEL); + + // Jump to the JIT return address + jmp_rm(cb, REG0); + + // Fall back to the interpreter + cb_write_label(cb, FALLBACK_LABEL); + cb_link_labels(cb); + + + + + cb_write_post_call_bytes(cb); return true; diff --git a/ujit_core.c b/ujit_core.c index d7b0d65e69..105769955c 100644 --- a/ujit_core.c +++ b/ujit_core.c @@ -32,7 +32,7 @@ Get an operand for the adjusted stack pointer address https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L32 x86opnd_t ctx_sp_opnd(ctx_t* ctx, int32_t offset_bytes) { - int32_t offset = (ctx->stack_size) * sizeof(VALUE) + offset_bytes; + int32_t offset = (ctx->sp_offset * sizeof(VALUE)) + offset_bytes; return mem_opnd(64, REG_SP, offset); } @@ -49,9 +49,10 @@ ctx_stack_push(ctx_t* ctx, int type) https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L49 ctx->temp_types[ctx->stack_size] = type; ctx->stack_size += 1; + ctx->sp_offset += 1; // SP points just above the topmost value - int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE); + int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE); return mem_opnd(64, REG_SP, offset); } @@ -65,7 +66,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n) https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L66 RUBY_ASSERT(n <= ctx->stack_size); // SP points just above the topmost value - int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE); + int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE); x86opnd_t top = mem_opnd(64, REG_SP, offset); // Clear the types of the popped values @@ -77,6 +78,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n) https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L78 } ctx->stack_size -= n; + ctx->sp_offset -= n; return top; } @@ -88,7 +90,7 @@ x86opnd_t https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L90 ctx_stack_opnd(ctx_t* ctx, int32_t idx) { // SP points just above the topmost value - int32_t offset = (ctx->stack_size - 1 - idx) * sizeof(VALUE); + int32_t offset = (ctx->sp_offset - 1 - idx) * sizeof(VALUE); x86opnd_t opnd = mem_opnd(64, REG_SP, offset); return opnd; @@ -120,6 +122,9 @@ int ctx_diff(const ctx_t* src, const ctx_t* dst) https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L122 if (dst->stack_size != src->stack_size) return INT_MAX; + if (dst->sp_offset != src->sp_offset) + return INT_MAX; + if (dst->self_is_object != src->self_is_object) return INT_MAX; @@ -345,6 +350,7 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx) https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L350 // Limit the number of block versions ctx_t generic_ctx = DEFAULT_CTX; generic_ctx.stack_size = target_ctx->stack_size; + generic_ctx.sp_offset = target_ctx->sp_offset; if (count_block_versions(target) >= MAX_VERSIONS - 1) { fprintf(stderr, "version limit hit in branch_stub_hit\n"); @@ -383,7 +389,6 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx) https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L389 } // Get a version or stub corresponding to a branch target -// TODO: need incoming and target contexts uint8_t* get_branch_target( blockid_t target, const ctx_t* ctx, @@ -440,13 +445,13 @@ void gen_branch( https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L445 ) { RUBY_ASSERT(target0.iseq != NULL); - RUBY_ASSERT(target1.iseq != NULL); + //RUBY_ASSERT(target1.iseq != NULL); RUBY_ASSERT(num_branches < MAX_BRANCHES); uint32_t branch_idx = num_branches++; // Get the branch targets or stubs uint8_t* dst_addr0 = get_branch_target(target0, ctx0, branch_idx, 0); - uint8_t* dst_addr1 = get_branch_target(target1, ctx1, branch_idx, 1); + uint8_t* dst_addr1 = ctx1? get_branch_target(target1, ctx1, branch_idx, 1):NULL; // Call the branch generation function uint32_t start_pos = cb->write_pos; @@ -459,7 +464,7 @@ void gen_branch( https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L464 end_pos, *src_ctx, { target0, target1 }, - { *ctx0, *ctx1 }, + { *ctx0, ctx1? *ctx1:DEFAULT_CTX }, { dst_addr0, dst_addr1 }, gen_fn, SHAPE_DEFAULT @@ -508,6 +513,7 @@ void gen_direct_jump( https://github.com/ruby/ruby/blob/trunk/ujit_core.c#L513 // Limit the number of block versions ctx_t generic_ctx = DEFAULT_CTX; generic_ctx.stack_size = ctx->stack_size; + generic_ctx.sp_offset = ctx->sp_offset; if (count_block_versions(target0) >= MAX_VERSIONS - 1) { fprintf(stderr, "version limit hit in branch_stub_hit\n"); diff --git a/ujit_core.h b/ujit_core.h index 08fdd4d779..9430269438 100644 --- a/ujit_core.h +++ b/ujit_core.h @@ -31,9 +31,12 @@ typedef struct CtxStruct https://github.com/ruby/ruby/blob/trunk/ujit_core.h#L31 // T_NONE==0 is the unknown type uint8_t temp_types[MAX_TEMP_TYPES]; - // Number of values pushed on the temporary stack + // Number of values currently on the temporary stack uint16_t stack_size; + // Offset of the JIT SP relative to the interpreter SP + int16_t sp_offset; + // Whether we know self is a heap object bool self_is_object : 1; diff --git a/vm.c b/vm.c index e98f89864b..86a78bc8d5 100644 --- a/vm.c +++ b/vm.c @@ -202,7 +202,7 @@ VM_CAPTURED_BLOCK_TO_CFP(co (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/