[前][次][番号順一覧][スレッド一覧]

ruby-changes:73557

From: Jimmy <ko1@a...>
Date: Wed, 14 Sep 2022 23:32:34 +0900 (JST)
Subject: [ruby-changes:73557] 758a1d7302 (master): Initial support for VM_CALL_ARGS_SPLAT (#6341)

https://git.ruby-lang.org/ruby.git/commit/?id=758a1d7302

From 758a1d730230ad0f4adfd7681c1fe4c8ac398bde Mon Sep 17 00:00:00 2001
From: Jimmy Miller <jimmy.miller@s...>
Date: Wed, 14 Sep 2022 10:32:22 -0400
Subject: Initial support for VM_CALL_ARGS_SPLAT (#6341)

* Initial support for VM_CALL_ARGS_SPLAT

This implements support for calls with splat (*) for some methods. In
benchmarks this made very little difference for most benchmarks, but a
large difference for binarytrees. Looking at side exits, many
benchmarks now don't exit for splat, but exit for some other
reason. Binarytrees however had a number of calls that used splat args
that are now much faster. In my non-scientific benchmarking this made
splat args performance on par with not using splat args at all.

* Fix wording and whitespace

Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@g...>

* Get rid of side_effect reassignment

Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@g...>
---
 yjit.c                         |   6 ++
 yjit/bindgen/src/main.rs       |   1 +
 yjit/src/codegen.rs            | 200 +++++++++++++++++++++++++++++++++--------
 yjit/src/cruby.rs              |   2 +
 yjit/src/cruby_bindings.inc.rs |   3 +
 yjit/src/stats.rs              |   7 +-
 6 files changed, 182 insertions(+), 37 deletions(-)

diff --git a/yjit.c b/yjit.c
index a834191070..facddcca0a 100644
--- a/yjit.c
+++ b/yjit.c
@@ -603,6 +603,12 @@ rb_get_iseq_flags_has_rest(const rb_iseq_t *iseq) https://github.com/ruby/ruby/blob/trunk/yjit.c#L603
     return iseq->body->param.flags.has_rest;
 }
 
+bool
+rb_get_iseq_flags_ruby2_keywords(const rb_iseq_t *iseq)
+{
+    return iseq->body->param.flags.ruby2_keywords;
+}
+
 bool
 rb_get_iseq_flags_has_block(const rb_iseq_t *iseq)
 {
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index eaf030a1de..294da21378 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -342,6 +342,7 @@ fn main() { https://github.com/ruby/ruby/blob/trunk/yjit/bindgen/src/main.rs#L342
         .allowlist_function("rb_get_iseq_flags_has_kwrest")
         .allowlist_function("rb_get_iseq_flags_has_block")
         .allowlist_function("rb_get_iseq_flags_has_accepts_no_kwarg")
+        .allowlist_function("rb_get_iseq_flags_ruby2_keywords")
         .allowlist_function("rb_get_iseq_body_local_table_size")
         .allowlist_function("rb_get_iseq_body_param_keyword")
         .allowlist_function("rb_get_iseq_body_param_size")
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index ca9ec655d1..f13505365e 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -482,7 +482,7 @@ fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) - https://github.com/ruby/ruby/blob/trunk/yjit/src/codegen.rs#L482
 //
 // No guards change the logic for reconstructing interpreter state at the
 // moment, so there is one unique side exit for each context. Note that
-// it's incorrect to jump to the side exit after any ctx stack push/pop operations
+// it's incorrect to jump to the side exit after any ctx stack push operations
 // since they change the logic required for reconstructing interpreter state.
 fn get_side_exit(jit: &mut JITState, ocb: &mut OutlinedCb, ctx: &Context) -> CodePtr {
     match jit.side_exit_for_pc {
@@ -1427,7 +1427,7 @@ fn gen_expandarray( https://github.com/ruby/ruby/blob/trunk/yjit/src/codegen.rs#L1427
         return KeepCompiling;
     }
 
-    // Move the array from the stack into REG0 and check that it's an array.
+    // Move the array from the stack and check that it's an array.
     let array_reg = asm.load(array_opnd);
     guard_object_is_heap(
         asm,
@@ -3911,6 +3911,12 @@ fn gen_send_cfunc( https://github.com/ruby/ruby/blob/trunk/yjit/src/codegen.rs#L3911
 ) -> CodegenStatus {
     let cfunc = unsafe { get_cme_def_body_cfunc(cme) };
     let cfunc_argc = unsafe { get_mct_argc(cfunc) };
+    let mut argc = argc;
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    let flags = unsafe { vm_ci_flag(ci) };
 
     // If the function expects a Ruby array of arguments
     if cfunc_argc < 0 && cfunc_argc != -1 {
@@ -3925,25 +3931,6 @@ fn gen_send_cfunc( https://github.com/ruby/ruby/blob/trunk/yjit/src/codegen.rs#L3931
         unsafe { get_cikw_keyword_len(kw_arg) }
     };
 
-    // Number of args which will be passed through to the callee
-    // This is adjusted by the kwargs being combined into a hash.
-    let passed_argc = if kw_arg.is_null() {
-        argc
-    } else {
-        argc - kw_arg_num + 1
-    };
-
-    // If the argument count doesn't match
-    if cfunc_argc >= 0 && cfunc_argc != passed_argc {
-        gen_counter_incr!(asm, send_cfunc_argc_mismatch);
-        return CantCompile;
-    }
-
-    // Don't JIT functions that need C stack arguments for now
-    if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) {
-        gen_counter_incr!(asm, send_cfunc_toomany_args);
-        return CantCompile;
-    }
 
     if c_method_tracing_currently_enabled(jit) {
         // Don't JIT if tracing c_call or c_return
@@ -3964,9 +3951,6 @@ fn gen_send_cfunc( https://github.com/ruby/ruby/blob/trunk/yjit/src/codegen.rs#L3951
         }
     }
 
-    // Create a side-exit to fall back to the interpreter
-    let side_exit = get_side_exit(jit, ocb, ctx);
-
     // Check for interrupts
     gen_check_ints(asm, side_exit);
 
@@ -3978,6 +3962,38 @@ fn gen_send_cfunc( https://github.com/ruby/ruby/blob/trunk/yjit/src/codegen.rs#L3962
     asm.cmp(CFP, stack_limit);
     asm.jbe(counted_exit!(ocb, side_exit, send_se_cf_overflow).into());
 
+    if flags & VM_CALL_ARGS_SPLAT != 0 {
+        if cfunc_argc > 0 {
+            // push_splat_args does a ctx.stack_push so we can no longer side exit
+            argc = push_splat_args(argc, ctx, asm, ocb, side_exit, cfunc_argc as u32);
+        } else {
+            // This is a variadic c function and we'd need to pop
+            // each of the elements off the array, but the array may be dynamically sized
+            gen_counter_incr!(asm, send_args_splat_variadic);
+            return CantCompile
+        }
+    }
+
+    // Number of args which will be passed through to the callee
+    // This is adjusted by the kwargs being combined into a hash.
+    let passed_argc = if kw_arg.is_null() {
+        argc
+    } else {
+        argc - kw_arg_num + 1
+    };
+
+    // If the argument count doesn't match
+    if cfunc_argc >= 0 && cfunc_argc != passed_argc {
+        gen_counter_incr!(asm, send_cfunc_argc_mismatch);
+        return CantCompile;
+    }
+
+    // Don't JIT functions that need C stack arguments for now
+    if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) {
+        gen_counter_incr!(asm, send_cfunc_toomany_args);
+        return CantCompile;
+    }
+
     // Points to the receiver operand on the stack
     let recv = ctx.stack_opnd(argc);
 
@@ -4152,6 +4168,85 @@ fn gen_return_branch( https://github.com/ruby/ruby/blob/trunk/yjit/src/codegen.rs#L4168
     }
 }
 
+
+/// Pushes arguments from an array to the stack that are passed with a splat (i.e. *args)
+/// It optimistically compiles to a static size that is the exact number of arguments
+/// needed for the function.
+fn push_splat_args(argc: i32, ctx: &mut Context, asm: &mut Assembler, ocb: &mut OutlinedCb, side_exit: CodePtr, num_params: u32) -> i32 {
+
+    let mut argc = argc;
+
+    asm.comment("push_splat_args");
+
+    let array_opnd = ctx.stack_opnd(0);
+
+    argc = argc - 1;
+
+    let array_reg = asm.load(array_opnd);
+    guard_object_is_heap(
+        asm,
+        array_reg,
+        counted_exit!(ocb, side_exit, send_splat_not_array),
+    );
+    guard_object_is_array(
+        asm,
+        array_reg,
+        counted_exit!(ocb, side_exit, send_splat_not_array),
+    );
+
+    // Pull out the embed flag to check if it's an embedded array.
+    let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
+
+    // Get the length of the array
+    let emb_len_opnd = asm.and(flags_opnd, (RARRAY_EMBED_LEN_MASK as u64).into());
+    let emb_len_opnd = asm.rshift(emb_len_opnd, (RARRAY_EMBED_LEN_SHIFT as u64).into());
+
+    // Conditionally move the length of the heap array
+    let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
+    asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into());
+    let array_len_opnd = Opnd::mem(
+        (8 * size_of::<std::os::raw::c_long>()) as u8,
+        asm.load(array_opnd),
+        RUBY_OFFSET_RARRAY_AS_HEAP_LEN,
+    );
+    let array_len_opnd = asm.csel_nz(emb_len_opnd, array_len_opnd);
+
+    let required_args = num_params - argc as u32;
+
+    // Only handle the case where the number of values in the array is equal to the number requested
+    asm.cmp(array_len_opnd, required_args.into());
+    asm.jne(counted_exit!(ocb, side_exit, send_splatarray_rhs_too_small).into());
+
+    let array_opnd = ctx.stack_pop(1);
+
+    if required_args > 0 {
+
+        // Load the address of the embedded array
+        // (struct RArray *)(obj)->as.ary
+        let array_reg = asm.load(array_opnd);
+        let ary_opnd = asm.lea(Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RARRAY_AS_ARY));
+
+        // Conditionally load the address of the heap array
+        // (struct RArray *)(obj)->as.heap.ptr
+        let flags_opnd = Opnd::mem((8 * SIZEOF_VALUE) as u8, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
+        asm.test(flags_opnd, Opnd::UImm(RARRAY_EMBED_FLAG as u64));
+        let heap_ptr_opnd = Opnd::mem(
+            (8 * size_of::<usize>()) as u8,
+            asm.load(array_opnd),
+            RUBY_OFFSET_RARRAY_AS_HEAP_PTR,
+        );
+
+        let ary_opnd = asm.csel_nz(ary_opnd, heap_ptr_opnd);
+
+        for i in (0..required_args as i32) {
+            let top = ctx.stack_push(Type::Unknown);
+            asm.mov(top, Opnd::mem(64, ary_opnd, i * (SIZEOF_VALUE as  (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]