[前][次][番号順一覧][スレッド一覧]

ruby-changes:69017

From: Maxime <ko1@a...>
Date: Thu, 21 Oct 2021 08:20:16 +0900 (JST)
Subject: [ruby-changes:69017] 860589c7fa (master): Use builtin_inline_p to avoid pushing a frame for primitive C methods (#63)

https://git.ruby-lang.org/ruby.git/commit/?id=860589c7fa

From 860589c7fa63faa837e0922f581ef8b1af7837d4 Mon Sep 17 00:00:00 2001
From: Maxime Chevalier-Boisvert <maximechevalierb@g...>
Date: Thu, 3 Jun 2021 08:39:52 -0400
Subject: Use builtin_inline_p to avoid pushing a frame for primitive C methods
 (#63)

* Use builtin_inline_p to skip a frame of C methods

* Fix bugs in primitive cfunc call code

* Remove if (push_frame) {}

* Remove if (push_frame) {}

* Push Aaron's fix to avoid hardcoding insn lengths

Co-authored-by: Takashi Kokubun <takashikkbn@g...>
---
 yjit_codegen.c | 209 +++++++++++++++++++++++++++++++++++----------------------
 yjit_iface.c   |  13 ----
 yjit_iface.h   |   1 -
 3 files changed, 128 insertions(+), 95 deletions(-)

diff --git a/yjit_codegen.c b/yjit_codegen.c
index d2b4e3ddbb..847bb57e0d 100644
--- a/yjit_codegen.c
+++ b/yjit_codegen.c
@@ -2253,23 +2253,18 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L2253
     //print_str(cb, "recv");
     //print_ptr(cb, recv);
 
-    // If this function needs a Ruby stack frame
-    const bool push_frame = cfunc_needs_frame(cfunc);
-
     // Create a size-exit to fall back to the interpreter
     uint8_t *side_exit = yjit_side_exit(jit, ctx);
 
     // Check for interrupts
     yjit_check_ints(cb, side_exit);
 
-    if (push_frame) {
-        // Stack overflow check
-        // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
-        // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
-        lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + sizeof(rb_control_frame_t)));
-        cmp(cb, REG_CFP, REG0);
-        jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow));
-    }
+    // Stack overflow check
+    // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
+    // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
+    lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + sizeof(rb_control_frame_t)));
+    cmp(cb, REG_CFP, REG0);
+    jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow));
 
     // Points to the receiver operand on the stack
     x86opnd_t recv = ctx_stack_opnd(ctx, argc);
@@ -2277,71 +2272,69 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L2272
     // Store incremented PC into current control frame in case callee raises.
     jit_save_pc(jit, REG0);
 
-    if (push_frame) {
-        if (block) {
-            // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
-            // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
-            // with cfp->block_code.
-            jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
-            mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
-        }
-
-        // Increment the stack pointer by 3 (in the callee)
-        // sp += 3
-        lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
-
-        // Write method entry at sp[-3]
-        // sp[-3] = me;
-        // Put compile time cme into REG1. It's assumed to be valid because we are notified when
-        // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
-        jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
-        mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
-
-        // Write block handler at sp[-2]
-        // sp[-2] = block_handler;
-        if (block) {
-            // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
-            lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
-            or(cb, REG1, imm_opnd(1));
-            mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
-        }
-        else {
-            mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
-        }
+    if (block) {
+        // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
+        // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
+        // with cfp->block_code.
+        jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
+        mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
+    }
 
-        // Write env flags at sp[-1]
-        // sp[-1] = frame_type;
-        uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
-        mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
+    // Increment the stack pointer by 3 (in the callee)
+    // sp += 3
+    lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
 
-        // Allocate a new CFP (ec->cfp--)
-        sub(
-            cb,
-            member_opnd(REG_EC, rb_execution_context_t, cfp),
-            imm_opnd(sizeof(rb_control_frame_t))
-        );
+    // Write method entry at sp[-3]
+    // sp[-3] = me;
+    // Put compile time cme into REG1. It's assumed to be valid because we are notified when
+    // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
+    jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
+    mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
 
-        // Setup the new frame
-        // *cfp = (const struct rb_control_frame_struct) {
-        //    .pc         = 0,
-        //    .sp         = sp,
-        //    .iseq       = 0,
-        //    .self       = recv,
-        //    .ep         = sp - 1,
-        //    .block_code = 0,
-        //    .__bp__     = sp,
-        // };
-        mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
-        mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
-        mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
-        mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
-        mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
-        mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
-        sub(cb, REG0, imm_opnd(sizeof(VALUE)));
-        mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
-        mov(cb, REG0, recv);
-        mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
+    // Write block handler at sp[-2]
+    // sp[-2] = block_handler;
+    if (block) {
+        // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
+        lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
+        or(cb, REG1, imm_opnd(1));
+        mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
     }
+    else {
+        mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
+    }
+
+    // Write env flags at sp[-1]
+    // sp[-1] = frame_type;
+    uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
+    mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
+
+    // Allocate a new CFP (ec->cfp--)
+    sub(
+        cb,
+        member_opnd(REG_EC, rb_execution_context_t, cfp),
+        imm_opnd(sizeof(rb_control_frame_t))
+    );
+
+    // Setup the new frame
+    // *cfp = (const struct rb_control_frame_struct) {
+    //    .pc         = 0,
+    //    .sp         = sp,
+    //    .iseq       = 0,
+    //    .self       = recv,
+    //    .ep         = sp - 1,
+    //    .block_code = 0,
+    //    .__bp__     = sp,
+    // };
+    mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
+    mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
+    mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
+    mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
+    mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
+    mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
+    sub(cb, REG0, imm_opnd(sizeof(VALUE)));
+    mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
+    mov(cb, REG0, recv);
+    mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
 
     // Verify that we are calling the right function
     if (YJIT_CHECK_MODE > 0) {
@@ -2407,15 +2400,12 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L2400
     x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
     mov(cb, stack_ret, RAX);
 
-    // If this function needs a Ruby stack frame
-    if (push_frame) {
-        // Pop the stack frame (ec->cfp++)
-        add(
-            cb,
-            member_opnd(REG_EC, rb_execution_context_t, cfp),
-            imm_opnd(sizeof(rb_control_frame_t))
-        );
-    }
+    // Pop the stack frame (ec->cfp++)
+    add(
+        cb,
+        member_opnd(REG_EC, rb_execution_context_t, cfp),
+        imm_opnd(sizeof(rb_control_frame_t))
+    );
 
     // Note: gen_oswb_iseq() jumps to the next instruction with ctx->sp_offset == 0
     // after the call, while this does not. This difference prevents
@@ -2463,6 +2453,30 @@ iseq_lead_only_arg_setup_p(const rb_iseq_t *iseq) https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L2453
 bool rb_iseq_only_optparam_p(const rb_iseq_t *iseq);
 bool rb_iseq_only_kwparam_p(const rb_iseq_t *iseq);
 
+// If true, the iseq is leaf and it can be replaced by a single C call.
+static bool
+rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq)
+{
+    unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave));
+    unsigned int leave_len = insn_len(BIN(leave));
+
+    return iseq->body->iseq_size == (
+        (invokebuiltin_len + leave_len) &&
+        rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) &&
+        rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) &&
+        iseq->body->builtin_inline_p
+    );
+ }
+
+// Return an rb_builtin_function if the iseq contains only that leaf builtin function.
+static const struct rb_builtin_function*
+rb_leaf_builtin_function(const rb_iseq_t *iseq)
+{
+    if (!rb_leaf_invokebuiltin_iseq_p(iseq))
+        return NULL;
+    return (const struct rb_builtin_ (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]