[前][次][番号順一覧][スレッド一覧]

ruby-changes:73322

From: Kevin <ko1@a...>
Date: Tue, 30 Aug 2022 01:10:23 +0900 (JST)
Subject: [ruby-changes:73322] 54c7bc67a2 (master): Various AArch64 optimizations (https://github.com/Shopify/ruby/pull/433)

https://git.ruby-lang.org/ruby.git/commit/?id=54c7bc67a2

From 54c7bc67a2d54311b77aca9233b23a9e7a1ca581 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@g...>
Date: Tue, 23 Aug 2022 13:41:22 -0400
Subject: Various AArch64 optimizations
 (https://github.com/Shopify/ruby/pull/433)

* When we're storing an immediate 0 value at a memory address, we
  can use STUR XZR, Xd instead of loading 0 into a register and
  then storing that register.
* When we're moving 0 into an argument register, we can use
  MOV Xd, XZR instead of loading the value into a register first.
* In the newarray instruction, we can skip looking at the stack at
  all if the number of values we're using is 0.
---
 yjit/src/asm/arm64/mod.rs     |  3 +++
 yjit/src/asm/arm64/opnd.rs    |  5 ++++-
 yjit/src/backend/arm64/mod.rs | 14 ++++++++++++--
 yjit/src/codegen.rs           | 11 ++++++++---
 4 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index e5ba2f81ea..fb07498ce2 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -542,6 +542,9 @@ pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/mod.rs#L542
 
             LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into()
         },
+        (A64Opnd::Reg(rd), A64Opnd::UImm(0)) => {
+            LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into()
+        },
         (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => {
             LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
         },
diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs
index a10e289455..c89481fb03 100644
--- a/yjit/src/asm/arm64/opnd.rs
+++ b/yjit/src/asm/arm64/opnd.rs
@@ -111,6 +111,9 @@ pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 }; https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/opnd.rs#L111
 pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 };
 pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 };
 
+// zero register
+pub const XZR_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 31 };
+
 // 64-bit registers
 pub const X0: A64Opnd = A64Opnd::Reg(X0_REG);
 pub const X1: A64Opnd = A64Opnd::Reg(X1_REG);
@@ -143,7 +146,7 @@ pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/opnd.rs#L146
 pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 });
 pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 });
 pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 });
-pub const X31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 31 });
+pub const X31: A64Opnd = A64Opnd::Reg(XZR_REG);
 
 // 32-bit registers
 pub const W0: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 0 });
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 3f1bbf99b0..08eb5efa3f 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -289,7 +289,14 @@ impl Assembler https://github.com/ruby/ruby/blob/trunk/yjit/src/backend/arm64/mod.rs#L289
                     // Note: the iteration order is reversed to avoid corrupting x0,
                     // which is both the return value and first argument register
                     for (idx, opnd) in opnds.into_iter().enumerate().rev() {
-                        let value = split_load_operand(asm, opnd);
+                        // If the value that we're sending is 0, then we can use
+                        // the zero register, so in this case we'll just send
+                        // a UImm of 0 along as the argument to the move.
+                        let value = match opnd {
+                            Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0),
+                            _ => split_load_operand(asm, opnd)
+                        };
+
                         asm.mov(C_ARG_OPNDS[idx], value);
                     }
 
@@ -386,7 +393,10 @@ impl Assembler https://github.com/ruby/ruby/blob/trunk/yjit/src/backend/arm64/mod.rs#L393
                     };
                 },
                 Insn::Mov { dest, src } => {
-                    let value = match (dest, src) {
+                    let value: Opnd = match (dest, src) {
+                        // If the first operand is zero, then we can just use
+                        // the zero register.
+                        (Opnd::Mem(_), Opnd::UImm(0) | Opnd::Imm(0)) => Opnd::Reg(XZR_REG),
                         // If the first operand is a memory operand, we're going
                         // to transform this into a store instruction, so we'll
                         // need to load this anyway.
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index e23171d2a0..1336fe3c57 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -1168,9 +1168,14 @@ fn gen_newarray( https://github.com/ruby/ruby/blob/trunk/yjit/src/codegen.rs#L1168
     // Save the PC and SP because we are allocating
     jit_prepare_routine_call(jit, ctx, asm);
 
-    let offset_magnitude = SIZEOF_VALUE as u32 * n;
-    let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize));
-    let values_ptr = asm.lea(values_opnd);
+    // If n is 0, then elts is never going to be read, so we can just pass null
+    let values_ptr = if n == 0 {
+        Opnd::UImm(0)
+    } else {
+        let offset_magnitude = SIZEOF_VALUE as u32 * n;
+        let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize));
+        asm.lea(values_opnd)
+    };
 
     // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
     let new_ary = asm.ccall(
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]