ruby-changes:73322
From: Kevin <ko1@a...>
Date: Tue, 30 Aug 2022 01:10:23 +0900 (JST)
Subject: [ruby-changes:73322] 54c7bc67a2 (master): Various AArch64 optimizations (https://github.com/Shopify/ruby/pull/433)
https://git.ruby-lang.org/ruby.git/commit/?id=54c7bc67a2 From 54c7bc67a2d54311b77aca9233b23a9e7a1ca581 Mon Sep 17 00:00:00 2001 From: Kevin Newton <kddnewton@g...> Date: Tue, 23 Aug 2022 13:41:22 -0400 Subject: Various AArch64 optimizations (https://github.com/Shopify/ruby/pull/433) * When we're storing an immediate 0 value at a memory address, we can use STUR XZR, Xd instead of loading 0 into a register and then storing that register. * When we're moving 0 into an argument register, we can use MOV Xd, XZR instead of loading the value into a register first. * In the newarray instruction, we can skip looking at the stack at all if the number of values we're using is 0. --- yjit/src/asm/arm64/mod.rs | 3 +++ yjit/src/asm/arm64/opnd.rs | 5 ++++- yjit/src/backend/arm64/mod.rs | 14 ++++++++++++-- yjit/src/codegen.rs | 11 ++++++++--- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index e5ba2f81ea..fb07498ce2 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -542,6 +542,9 @@ pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/mod.rs#L542 LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into() }, + (A64Opnd::Reg(rd), A64Opnd::UImm(0)) => { + LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into() + }, (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => { LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into() }, diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index a10e289455..c89481fb03 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -111,6 +111,9 @@ pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 }; https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/opnd.rs#L111 pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 }; pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 }; +// zero register +pub const XZR_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 31 }; + // 64-bit registers pub const X0: A64Opnd = A64Opnd::Reg(X0_REG); pub const X1: A64Opnd = A64Opnd::Reg(X1_REG); @@ -143,7 +146,7 @@ pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/opnd.rs#L146 pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 }); pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 }); pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 }); -pub const X31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 31 }); +pub const X31: A64Opnd = A64Opnd::Reg(XZR_REG); // 32-bit registers pub const W0: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 0 }); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 3f1bbf99b0..08eb5efa3f 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -289,7 +289,14 @@ impl Assembler https://github.com/ruby/ruby/blob/trunk/yjit/src/backend/arm64/mod.rs#L289 // Note: the iteration order is reversed to avoid corrupting x0, // which is both the return value and first argument register for (idx, opnd) in opnds.into_iter().enumerate().rev() { - let value = split_load_operand(asm, opnd); + // If the value that we're sending is 0, then we can use + // the zero register, so in this case we'll just send + // a UImm of 0 along as the argument to the move. + let value = match opnd { + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), + _ => split_load_operand(asm, opnd) + }; + asm.mov(C_ARG_OPNDS[idx], value); } @@ -386,7 +393,10 @@ impl Assembler https://github.com/ruby/ruby/blob/trunk/yjit/src/backend/arm64/mod.rs#L393 }; }, Insn::Mov { dest, src } => { - let value = match (dest, src) { + let value: Opnd = match (dest, src) { + // If the first operand is zero, then we can just use + // the zero register. + (Opnd::Mem(_), Opnd::UImm(0) | Opnd::Imm(0)) => Opnd::Reg(XZR_REG), // If the first operand is a memory operand, we're going // to transform this into a store instruction, so we'll // need to load this anyway. diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index e23171d2a0..1336fe3c57 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1168,9 +1168,14 @@ fn gen_newarray( https://github.com/ruby/ruby/blob/trunk/yjit/src/codegen.rs#L1168 // Save the PC and SP because we are allocating jit_prepare_routine_call(jit, ctx, asm); - let offset_magnitude = SIZEOF_VALUE as u32 * n; - let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); - let values_ptr = asm.lea(values_opnd); + // If n is 0, then elts is never going to be read, so we can just pass null + let values_ptr = if n == 0 { + Opnd::UImm(0) + } else { + let offset_magnitude = SIZEOF_VALUE as u32 * n; + let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); + asm.lea(values_opnd) + }; // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts); let new_ary = asm.ccall( -- cgit v1.2.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/