ruby-changes:68562
From: Maxime <ko1@a...>
Date: Thu, 21 Oct 2021 08:08:40 +0900 (JST)
Subject: [ruby-changes:68562] a3b411b9c8 (master): add and mov instruction encoding ported and tested
https://git.ruby-lang.org/ruby.git/commit/?id=a3b411b9c8 From a3b411b9c82010d480ce3bf203dcc8b8600722ce Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@s...> Date: Wed, 9 Sep 2020 16:45:28 -0400 Subject: add and mov instruction encoding ported and tested --- test_asm.sh | 2 +- ujit_asm.c | 472 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- ujit_asm.h | 18 ++- ujit_asm_tests.c | 119 +++++++++++++- 4 files changed, 601 insertions(+), 10 deletions(-) diff --git a/test_asm.sh b/test_asm.sh index 6a48bf5112..1726a6a4e5 100755 --- a/test_asm.sh +++ b/test_asm.sh @@ -3,7 +3,7 @@ https://github.com/ruby/ruby/blob/trunk/test_asm.sh#L3 clear -clang -std=gnu99 -Wall ujit_asm.c ujit_asm_tests.c -o asm_test +clang -std=gnu99 -Wall -Werror -Wshorten-64-to-32 ujit_asm.c ujit_asm_tests.c -o asm_test ./asm_test diff --git a/ujit_asm.c b/ujit_asm.c index 5afea1a0c2..41167e9d42 100644 --- a/ujit_asm.c +++ b/ujit_asm.c @@ -11,6 +11,9 @@ https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L11 // TODO: give ujit_examples.h some more meaningful file name #include "ujit_examples.h" +// Dummy none/null operand +const x86opnd_t NO_OPND = { OPND_NONE, 0, .imm = 0 }; + // 64-bit GP registers const x86opnd_t RAX = { OPND_REG, 64, .reg = { REG_GP, 0 }}; const x86opnd_t RCX = { OPND_REG, 64, .reg = { REG_GP, 1 }}; @@ -29,6 +32,56 @@ const x86opnd_t R13 = { OPND_REG, 64, .reg = { REG_GP, 13 }}; https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L32 const x86opnd_t R14 = { OPND_REG, 64, .reg = { REG_GP, 14 }}; const x86opnd_t R15 = { OPND_REG, 64, .reg = { REG_GP, 15 }}; +// Compute the number of bits needed to encode a signed value +size_t sig_imm_size(int64_t imm) +{ + // Compute the smallest size this immediate fits in + if (imm >= -128 && imm <= 127) + return 8; + if (imm >= -32768 && imm <= 32767) + return 16; + if (imm >= -2147483648 && imm <= 2147483647) + return 32; + + return 64; +} + +// Compute the number of bits needed to encode an unsigned value +size_t unsig_imm_size(uint64_t imm) +{ + // Compute the smallest size this immediate fits in + if (imm <= 255) + return 8; + else if (imm <= 65535) + return 16; + else if (imm <= 4294967295) + return 32; + + return 64; +} + +x86opnd_t mem_opnd(size_t num_bits, x86opnd_t base_reg, int32_t disp) +{ + x86opnd_t opnd = { + OPND_MEM, + num_bits, + .mem = { base_reg.reg.reg_no, 0, 0, false, false, disp } + }; + + return opnd; +} + +x86opnd_t imm_opnd(int64_t imm) +{ + x86opnd_t opnd = { + OPND_IMM, + sig_imm_size(imm), + .imm = imm + }; + + return opnd; +} + void cb_init(codeblock_t* cb, size_t mem_size) { // Map the memory as executable @@ -155,9 +208,14 @@ void cb_write_epilogue(codeblock_t* cb) https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L208 cb_write_byte(cb, ujit_post_call_bytes[i]); } -// Check if an operand needs a rex byte to be encoded +// Check if an operand needs a REX byte to be encoded bool rex_needed(x86opnd_t opnd) { + if (opnd.type == OPND_NONE || opnd.type == OPND_IMM) + { + return false; + } + if (opnd.type == OPND_REG) { return ( @@ -174,6 +232,53 @@ bool rex_needed(x86opnd_t opnd) https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L232 assert (false); } +// Check if an SIB byte is needed to encode this operand +bool sib_needed(x86opnd_t opnd) +{ + if (opnd.type != OPND_MEM) + return false; + + return ( + opnd.mem.has_idx || + opnd.mem.base_reg_no == RSP.reg.reg_no || + opnd.mem.base_reg_no == R12.reg.reg_no + ); +} + +// Compute the size of the displacement field needed for a memory operand +size_t disp_size(x86opnd_t opnd) +{ + assert (opnd.type == OPND_MEM); + + // If using RIP as the base, use disp32 + if (opnd.mem.is_iprel) + { + return 32; + } + + // Compute the required displacement size + if (opnd.mem.disp != 0) + { + size_t num_bits = sig_imm_size(opnd.mem.disp); + assert (num_bits <= 32 && "displacement does not fit in 32 bits"); + + // x86 can only encode 8-bit and 32-bit displacements + if (num_bits == 16) + num_bits = 32;; + + return num_bits; + } + + // If EBP or RBP or R13 is used as the base, displacement must be encoded + if (opnd.mem.base_reg_no == RBP.reg.reg_no || + opnd.mem.base_reg_no == R13.reg.reg_no) + { + return 8; + } + + return 0; +} + // Write the REX byte static void cb_write_rex( codeblock_t* cb, @@ -206,6 +311,371 @@ static void cb_write_opcode(codeblock_t* cb, uint8_t opcode, x86opnd_t reg) https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L311 cb_write_byte(cb, op_byte); } +// Encode an RM instruction +void cb_write_rm( + codeblock_t* cb, + bool szPref, + bool rexW, + x86opnd_t r_opnd, + x86opnd_t rm_opnd, + uint8_t opExt, + size_t op_len, + ...) +{ + assert (op_len > 0 && op_len <= 3); + assert (r_opnd.type == OPND_REG || r_opnd.type == OPND_NONE); + + // Flag to indicate the REX prefix is needed + bool need_rex = rexW || rex_needed(r_opnd) || rex_needed(rm_opnd); + + // Flag to indicate SIB byte is needed + bool need_sib = sib_needed(r_opnd) || sib_needed(rm_opnd); + + // Add the operand-size prefix, if needed + if (szPref == true) + cb_write_byte(cb, 0x66); + + // Add the REX prefix, if needed + if (need_rex) + { + // 0 1 0 0 w r x b + // w - 64-bit operand size flag + // r - MODRM.reg extension + // x - SIB.index extension + // b - MODRM.rm or SIB.base extension + + uint8_t w = rexW? 1:0; + + uint8_t r; + if (r_opnd.type != OPND_NONE) + r = (r_opnd.reg.reg_no & 8)? 1:0; + else + r = 0; + + uint8_t x; + if (need_sib && rm_opnd.mem.has_idx) + x = (rm_opnd.mem.idx_reg_no & 8)? 1:0; + else + x = 0; + + uint8_t b; + if (rm_opnd.type == OPND_REG) + b = (rm_opnd.reg.reg_no & 8)? 1:0; + else if (rm_opnd.type == OPND_MEM) + b = (rm_opnd.mem.base_reg_no & 8)? 1:0; + else + b = 0; + + // Encode and write the REX byte + uint8_t rex_byte = 0x40 + (w << 3) + (r << 2) + (x << 1) + (b); + cb_write_byte(cb, rex_byte); + } + + // Write the opcode bytes to the code block + va_list va; + va_start(va, op_len); + for (size_t i = 0; i < op_len; ++i) + { + uint8_t byte = va_arg(va, int); + cb_write_byte(cb, byte); + } + va_end(va); + + // MODRM.mod (2 bits) + // MODRM.reg (3 bits) + // MODRM.rm (3 bits) + + assert ( + !(opExt != 0xFF && r_opnd.type != OPND_NONE) && + "opcode extension and register operand present" + ); + + // Encode the mod field + uint8_t mod; + if (rm_opnd.type == OPND_REG) + { + mod = 3; + } + else + { + size_t dsize = disp_size(rm_opnd); + if (dsize == 0 || rm_opnd.mem.is_iprel) + mod = 0; + else if (dsize == 8) + mod = 1; + else if (dsize == 32) + mod = 2; + else + assert (false); + } + + // Encode the reg field + uint8_t reg; + if (opExt != 0xFF) + reg = opExt; + else if (r_opnd.type == OPND_REG) + reg = r_opnd.reg.reg_no & 7; + else + reg = 0; + + // Encode the rm field + uint8_t rm; + if (rm_opnd.type == OPND_REG) + { + rm = rm_opnd.reg.reg_no & 7; + } + else + { + if (need_sib) + rm = 4; + else + rm = rm_opnd.mem.base_reg_no & 7; + } + + // Encode and write the ModR/M byte + uint8_t rm_byte = (mod << 6) + (reg << 3) + (rm); + cb_write_byte(cb, rm_byte); + + // Add the SIB byte, if needed + if (need_sib) + { + // SIB.scale (2 bits) + // SIB.index (3 bits) + // SIB.base (3 bits) + + assert (rm_opnd.type == OPND_MEM); + + // Encode the scale value + uint8_t scale = rm_opnd.mem.scale_exp; + + // Encode the index value + uint8_t index; + if (!rm_opnd.mem.has_idx) + index = 4; + else + index = rm_opnd.mem.idx_reg_no & 7; + + // Encode the base register + uint8_t base = rm_opnd.mem.base_reg_no & 7; + + // Encode and write the SIB byte + uint8_t sib_byte = (scale << 6) + (index << 3) + (base); + cb_write_byte(cb, sib_byte); + } + + // Add the displacement size + if (rm_opnd.type == OPND_MEM && rm_opnd.mem.disp != 0) + { + size_t dsize = disp_size(rm_opnd); + cb_write_int(cb, rm_opnd.mem.disp, dsize); + } +} + +// Encode an add-like RM instruction with multiple possible encodings +void cb_write_rm_multi( + codeblock_t* cb, + const char* mnem, + uint8_t opMemReg8, + uint8_t opMemRegPref, + uint8_t opRegMem8, + uint8_t opRegMemPref, + uint8_t opMemImm8, + uint8_t opMemImmSml, + uint8_t opMemImmLrg, + uint8_t opExtImm, + x86opnd_t opnd0, + x86opnd_t opnd1) +{ + assert (opnd0.type == OPND_REG || opnd0.type == OPND_MEM); + + /* + // Write disassembly string + if (!opnd1.isNone) + cb.writeASM(mnem, opnd0, opnd1); + else + cb.writeASM(mnem, opnd0); + */ + + // Check the size of opnd0 + size_t opndSize = opnd0.num_bits; + + // Check the size of opnd1 + if (opnd1.type == OPND_REG || opnd1.type == OPND_MEM) + { + assert (opnd1.num_bits == opndSize && "operand size mismatch"); + } + else if (opnd1.type == OPND_IMM) + { + assert (opnd1.num_bits <= opndSize); + } + + assert (opndSize == 8 || opndSize == 16 || opndSize == 32 || opndSize == 64); + bool szPref = opndSize == 16; + bool rexW = opndSize == 64; + + // R/M + Reg + (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/