ruby-changes:73751

https://git.ruby-lang.org/ruby.git/commit/?id=28433e9aa0

From 28433e9aa0c765c9d20bc6397439a1b12e66bcbd Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@g...>
Date: Tue, 27 Sep 2022 16:58:01 -0400
Subject: Change IncrCounter lowering on AArch64 (#6455)

* Change IncrCounter lowering on AArch64

Previously we were using LDADDAL which is not available on
Graviton 1 chips. Instead, we're going to use an exclusive
load/store group through the LDAXR/STLXR instructions.

* Update yjit/src/backend/arm64/mod.rs

Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@g...>
---
 yjit/src/asm/arm64/inst/load_store_exclusive.rs | 109 ++++++++++++++++++++++++
 yjit/src/asm/arm64/inst/mod.rs                  |   2 +
 yjit/src/asm/arm64/mod.rs                       |  39 +++++++++
 yjit/src/asm/arm64/opnd.rs                      |  10 +++
 yjit/src/backend/arm64/mod.rs                   |  32 ++++---
 5 files changed, 181 insertions(+), 11 deletions(-)
 create mode 100644 yjit/src/asm/arm64/inst/load_store_exclusive.rs

diff --git a/yjit/src/asm/arm64/inst/load_store_exclusive.rs b/yjit/src/asm/arm64/inst/load_store_exclusive.rs
new file mode 100644
index 0000000000..8216c2200a
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/load_store_exclusive.rs
@@ -0,0 +1,109 @@ https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/inst/load_store_exclusive.rs#L1
+/// The operation being performed for this instruction.
+enum Op {
+    Store = 0,
+    Load = 1
+}
+
+/// The size of the registers being operated on.
+enum Size {
+    Size32 = 0b10,
+    Size64 = 0b11
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into a Size enum variant.
+impl From<u8> for Size {
+    fn from(num_bits: u8) -> Self {
+        match num_bits {
+            64 => Size::Size64,
+            32 => Size::Size32,
+            _ => panic!("Invalid number of bits: {}", num_bits)
+        }
+    }
+}
+
+/// The struct that represents an A64 load or store exclusive instruction that
+/// can be encoded.
+///
+/// LDAXR/STLXR
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// |  1     0  0    1  0  0  0    0     0                     1  1  1  1    1  1                                   |
+/// | size.                          op    rs..............                       rn.............. rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LoadStoreExclusive {
+    /// The number of the register to be loaded.
+    rt: u8,
+
+    /// The base register with which to form the address.
+    rn: u8,
+
+    /// The register to be used for the status result if it applies to this
+    /// operation. Otherwise it's the zero register.
+    rs: u8,
+
+    /// The operation being performed for this instruction.
+    op: Op,
+
+    /// The size of the registers being operated on.
+    size: Size
+}
+
+impl LoadStoreExclusive {
+    /// LDAXR
+    /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register-
+    pub fn ldaxr(rt: u8, rn: u8, num_bits: u8) -> Self {
+        Self { rt, rn, rs: 31, op: Op::Load, size: num_bits.into() }
+    }
+
+    /// STLXR
+    /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register-
+    pub fn stlxr(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self {
+        Self { rt, rn, rs, op: Op::Store, size: num_bits.into() }
+    }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<LoadStoreExclusive> for u32 {
+    /// Convert an instruction into a 32-bit value.
+    fn from(inst: LoadStoreExclusive) -> Self {
+        0
+        | ((inst.size as u32) << 30)
+        | (FAMILY << 25)
+        | ((inst.op as u32) << 22)
+        | ((inst.rs as u32) << 16)
+        | (0b111111 << 10)
+        | ((inst.rn as u32) << 5)
+        | (inst.rt as u32)
+    }
+}
+
+impl From<LoadStoreExclusive> for [u8; 4] {
+    /// Convert an instruction into a 4 byte array.
+    fn from(inst: LoadStoreExclusive) -> [u8; 4] {
+        let result: u32 = inst.into();
+        result.to_le_bytes()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_ldaxr() {
+        let inst = LoadStoreExclusive::ldaxr(16, 0, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xc85ffc10, result);
+    }
+
+    #[test]
+    fn test_stlxr() {
+        let inst = LoadStoreExclusive::stlxr(17, 16, 0, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xc811fc10, result);
+    }
+}
diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs
index b3a77e73c9..9821e6a334 100644
--- a/yjit/src/asm/arm64/inst/mod.rs
+++ b/yjit/src/asm/arm64/inst/mod.rs
@@ -13,6 +13,7 @@ mod halfword_imm; https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/inst/mod.rs#L13
 mod load_literal;
 mod load_register;
 mod load_store;
+mod load_store_exclusive;
 mod logical_imm;
 mod logical_reg;
 mod mov;
@@ -36,6 +37,7 @@ pub use halfword_imm::HalfwordImm; https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/inst/mod.rs#L37
 pub use load_literal::LoadLiteral;
 pub use load_register::LoadRegister;
 pub use load_store::LoadStore;
+pub use load_store_exclusive::LoadStoreExclusive;
 pub use logical_imm::LogicalImm;
 pub use logical_reg::LogicalReg;
 pub use mov::Mov;
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index d97452a045..88431ce30a 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -331,6 +331,20 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/mod.rs#L331
     cb.write_bytes(&bytes);
 }
 
+/// LDAXR - atomic load with acquire semantics
+pub fn ldaxr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+    let bytes: [u8; 4] = match (rt, rn) {
+        (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
+            assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
+
+            LoadStoreExclusive::ldaxr(rt.reg_no, rn.reg_no, rt.num_bits).into()
+        },
+        _ => panic!("Invalid operand combination to ldaxr instruction."),
+    };
+
+    cb.write_bytes(&bytes);
+}
+
 /// LDP (signed offset) - load a pair of registers from memory
 pub fn ldp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
     let bytes: [u8; 4] = match (rt1, rt2, rn) {
@@ -707,6 +721,21 @@ pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/mod.rs#L721
     cb.write_bytes(&bytes);
 }
 
+/// STLXR - store a value to memory, release exclusive access
+pub fn stlxr(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
+    let bytes: [u8; 4] = match (rs, rt, rn) {
+        (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
+            assert_eq!(rs.num_bits, 32, "rs must be a 32-bit register.");
+            assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
+
+            LoadStoreExclusive::stlxr(rs.reg_no, rt.reg_no, rn.reg_no, rn.num_bits).into()
+        },
+        _ => panic!("Invalid operand combination to stlxr instruction.")
+    };
+
+    cb.write_bytes(&bytes);
+}
+
 /// STP (signed offset) - store a pair of registers to memory
 pub fn stp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
     let bytes: [u8; 4] = match (rt1, rt2, rn) {
@@ -1183,6 +1212,11 @@ mod tests { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/mod.rs#L1212
         check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12));
     }
 
+    #[test]
+    fn test_ldaxr() {
+        check_bytes("6afd5fc8", |cb| ldaxr(cb, X10, X11));
+    }
+
     #[test]
     fn test_ldp() {
         check_bytes("8a2d4da9", |cb| ldp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
@@ -1333,6 +1367,11 @@ mod tests { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/mod.rs#L1367
         check_bytes("80025fd6", |cb| ret(cb, X20));
     }
 
+    #[test]
+    fn test_stlxr() {
+        check_bytes("8bfd0ac8", |cb| stlxr(cb, W10, X11, X12));
+    }
+
     #[test]
     fn test_stp() {
         check_bytes("8a2d0da9", |cb| stp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs
index 52b2a84637..0dc614ab4e 100644
--- a/yjit/src/asm/arm64/opnd.rs
+++ b/yjit/src/asm/arm64/opnd.rs
@@ -84,6 +84,14 @@ impl A64Opnd { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/opnd.rs#L84
             _ => false
         }
     }
+
+    /// Unwrap a register from an operand.
+    pub fn unwrap_reg(&self) -> A64Reg {
+        match self {
+            A64Opnd::Reg(reg) => *reg,
+            _ => panic!("Expected register operand")
+        }
+    }
 }
 
 // argument registers
@@ -102,6 +110,8 @@ pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 }; https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/arm64/opnd.rs#L110
 pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 };
 pub const X14_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 14 };
 pub const X15_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 15 };
+pub const X16_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 16 };
+pub const X17_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 17 };
 
 // callee-save registers
 pub const X19_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 19 };
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 0a5068be58..79dff530d1 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -70,7 +70,8 @@ impl Assembler https://github.com/ruby/ruby/blob/trunk/yjit/src/backend/arm64/mod.rs#L70
 {
     // A spec (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/