ruby-changes:69612

https://git.ruby-lang.org/ruby.git/commit/?id=91a9062626

From 91a9062626733c7d11ea4795bd1957a21f2adec6 Mon Sep 17 00:00:00 2001
From: Alan Wu <XrXr@u...>
Date: Fri, 5 Nov 2021 15:44:29 -0400
Subject: YJIT: use shorter encoding for mov(r64,imm) when unambiguous (#5081)

* YJIT: use shorter encoding for mov(r64,imm) when unambiguous

Previously, for small constants such as `mov(RAX, imm_opnd(Qundef))`,
we emit an instruction with an 8-byte immediate. This form commonly
gets the `movabs` mnemonic.

In 64-bit mode, 32-bit operands get zero extended to 64-bit to fill the
register, so when the immediate is small enough, we can save 4 bytes by
using the `mov` variant that takes a 32-bit immediate and does a zero
extension.

Not implement with this change, there is an imm32 variant of `mov` that
does sign extension we could use. When the constant is negative, we
fallback to the `movabs` form.

In railsbench, this change yields roughly a 12% code size reduction for
the outlined block.

Co-authored-by: Jemma Issroff <jemmaissroff@g...>

* [ci skip] comment edit. Please squash.

Co-authored-by: Jemma Issroff <jemmaissroff@g...>
---
 misc/yjit_asm_tests.c | 12 +++++++++++-
 yjit_asm.c            | 34 ++++++++++++++++++++++++++++------
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/misc/yjit_asm_tests.c b/misc/yjit_asm_tests.c
index 5708d3abadb..5548af07f53 100644
--- a/misc/yjit_asm_tests.c
+++ b/misc/yjit_asm_tests.c
@@ -182,10 +182,20 @@ void run_assembler_tests(void) https://github.com/ruby/ruby/blob/trunk/misc/yjit_asm_tests.c#L182
     // mov
     cb_set_pos(cb, 0); mov(cb, EAX, imm_opnd(7)); check_bytes(cb, "B807000000");
     cb_set_pos(cb, 0); mov(cb, EAX, imm_opnd(-3)); check_bytes(cb, "B8FDFFFFFF");
-    cb_set_pos(cb, 0); mov(cb, R15, imm_opnd(3)); check_bytes(cb, "49BF0300000000000000");
+    cb_set_pos(cb, 0); mov(cb, R15, imm_opnd(3)); check_bytes(cb, "41BF03000000");
     cb_set_pos(cb, 0); mov(cb, EAX, EBX); check_bytes(cb, "89D8");
     cb_set_pos(cb, 0); mov(cb, EAX, ECX); check_bytes(cb, "89C8");
     cb_set_pos(cb, 0); mov(cb, EDX, mem_opnd(32, RBX, 128)); check_bytes(cb, "8B9380000000");
+
+    // Test `mov rax, 3` => `mov eax, 3` optimization
+    cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(0x34)); check_bytes(cb, "41B834000000");
+    cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(0x80000000)); check_bytes(cb, "49B80000008000000000");
+    cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(-1)); check_bytes(cb, "49B8FFFFFFFFFFFFFFFF");
+
+    cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(0x34)); check_bytes(cb, "B834000000");
+    cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(0x80000000)); check_bytes(cb, "48B80000008000000000");
+    cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(-52)); check_bytes(cb, "48B8CCFFFFFFFFFFFFFF");
+    cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(-1)); check_bytes(cb, "48B8FFFFFFFFFFFFFFFF");
     /*
     test(
         delegate void (CodeBlock cb) { cb.mov(X86Opnd(AL), X86Opnd(8, RCX, 0, 1, RDX)); },
diff --git a/yjit_asm.c b/yjit_asm.c
index 49844145cb3..2ae50295a9b 100644
--- a/yjit_asm.c
+++ b/yjit_asm.c
@@ -1259,6 +1259,15 @@ void lea(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) https://github.com/ruby/ruby/blob/trunk/yjit_asm.c#L1259
     cb_write_rm(cb, false, true, dst, src, 0xFF, 1, 0x8D);
 }
 
+// Does this number fit in 32 bits and stays the same if you zero extend it to 64 bit?
+// If the sign bit is clear, sign extension and zero extension yield the same
+// result.
+static bool
+zero_extendable_32bit(uint64_t number)
+{
+    return number <= UINT32_MAX && (number & (1ull << 31ull)) == 0;
+}
+
 /// mov - Data move operation
 void mov(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
 {
@@ -1275,14 +1284,27 @@ void mov(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) https://github.com/ruby/ruby/blob/trunk/yjit_asm.c#L1284
                 unsig_imm_size(src.as.imm) <= dst.num_bits
             );
 
-            if (dst.num_bits == 16)
-                cb_write_byte(cb, 0x66);
-            if (rex_needed(dst) || dst.num_bits == 64)
-                cb_write_rex(cb, dst.num_bits == 64, 0, 0, dst.as.reg.reg_no);
+            // In case the source immediate could be zero extended to be 64
+            // bit, we can use the 32-bit operands version of the instruction.
+            // For example, we can turn mov(rax, 0x34) into the equivalent
+            // mov(eax, 0x34).
+            if (dst.num_bits == 64 && zero_extendable_32bit(src.as.unsig_imm)) {
+                if (rex_needed(dst))
+                    cb_write_rex(cb, false, 0, 0, dst.as.reg.reg_no);
+                cb_write_opcode(cb, 0xB8, dst);
+                cb_write_int(cb, src.as.imm, 32);
+            }
+            else {
+                if (dst.num_bits == 16)
+                    cb_write_byte(cb, 0x66);
+
+                if (rex_needed(dst) || dst.num_bits == 64)
+                    cb_write_rex(cb, dst.num_bits == 64, 0, 0, dst.as.reg.reg_no);
 
-            cb_write_opcode(cb, (dst.num_bits == 8)? 0xB0:0xB8, dst);
+                cb_write_opcode(cb, (dst.num_bits == 8)? 0xB0:0xB8, dst);
 
-            cb_write_int(cb, src.as.imm, dst.num_bits);
+                cb_write_int(cb, src.as.imm, dst.num_bits);
+            }
         }
 
         // M + Imm
-- 
cgit v1.2.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/