ruby-changes:68561
From: Maxime <ko1@a...>
Date: Thu, 21 Oct 2021 08:08:40 +0900 (JST)
Subject: [ruby-changes:68561] 8f40a62647 (master): Progress on x86 assembler. Encode a few simple instructions.
https://git.ruby-lang.org/ruby.git/commit/?id=8f40a62647 From 8f40a62647cd7b961faac6b810053b87502d3994 Mon Sep 17 00:00:00 2001 From: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@s...> Date: Tue, 8 Sep 2020 16:45:35 -0400 Subject: Progress on x86 assembler. Encode a few simple instructions. --- common.mk | 1 - test_asm.sh | 6 +++- ujit_asm.c | 84 +++++++++++++++++++++++++++++++++++++++---------- ujit_asm.h | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- ujit_asm_tests.c | 69 +++++++++++++++++++++++++++++++--------- 5 files changed, 223 insertions(+), 33 deletions(-) diff --git a/common.mk b/common.mk index 59d5dcea5d..1c62cc000c 100644 --- a/common.mk +++ b/common.mk @@ -151,7 +151,6 @@ COMMONOBJS = array.$(OBJEXT) \ https://github.com/ruby/ruby/blob/trunk/common.mk#L151 vm_sync.$(OBJEXT) \ vm_trace.$(OBJEXT) \ ujit_asm.$(OBJEXT) \ - ujit_asm_tests.$(OBJEXT) \ $(COROUTINE_OBJ) \ $(DTRACE_OBJ) \ $(BUILTIN_ENCOBJS) \ diff --git a/test_asm.sh b/test_asm.sh index 446dd36d65..6a48bf5112 100755 --- a/test_asm.sh +++ b/test_asm.sh @@ -1,6 +1,10 @@ https://github.com/ruby/ruby/blob/trunk/test_asm.sh#L1 # NOTE: I did not know what would be the sensible way to compile # and run these tests from the Ruby makefile -clang -std=c99 -Wall ujit_asm.c ujit_asm_tests.c -o asm_test +clear + +clang -std=gnu99 -Wall ujit_asm.c ujit_asm_tests.c -o asm_test ./asm_test + +rm asm_test diff --git a/ujit_asm.c b/ujit_asm.c index bb6ad29a5a..5afea1a0c2 100644 --- a/ujit_asm.c +++ b/ujit_asm.c @@ -11,6 +11,24 @@ https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L11 // TODO: give ujit_examples.h some more meaningful file name #include "ujit_examples.h" +// 64-bit GP registers +const x86opnd_t RAX = { OPND_REG, 64, .reg = { REG_GP, 0 }}; +const x86opnd_t RCX = { OPND_REG, 64, .reg = { REG_GP, 1 }}; +const x86opnd_t RDX = { OPND_REG, 64, .reg = { REG_GP, 2 }}; +const x86opnd_t RBX = { OPND_REG, 64, .reg = { REG_GP, 3 }}; +const x86opnd_t RSP = { OPND_REG, 64, .reg = { REG_GP, 4 }}; +const x86opnd_t RBP = { OPND_REG, 64, .reg = { REG_GP, 5 }}; +const x86opnd_t RSI = { OPND_REG, 64, .reg = { REG_GP, 6 }}; +const x86opnd_t RDI = { OPND_REG, 64, .reg = { REG_GP, 7 }}; +const x86opnd_t R8 = { OPND_REG, 64, .reg = { REG_GP, 8 }}; +const x86opnd_t R9 = { OPND_REG, 64, .reg = { REG_GP, 9 }}; +const x86opnd_t R10 = { OPND_REG, 64, .reg = { REG_GP, 10 }}; +const x86opnd_t R11 = { OPND_REG, 64, .reg = { REG_GP, 11 }}; +const x86opnd_t R12 = { OPND_REG, 64, .reg = { REG_GP, 12 }}; +const x86opnd_t R13 = { OPND_REG, 64, .reg = { REG_GP, 13 }}; +const x86opnd_t R14 = { OPND_REG, 64, .reg = { REG_GP, 14 }}; +const x86opnd_t R15 = { OPND_REG, 64, .reg = { REG_GP, 15 }}; + void cb_init(codeblock_t* cb, size_t mem_size) { // Map the memory as executable @@ -36,6 +54,15 @@ void cb_init(codeblock_t* cb, size_t mem_size) https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L54 cb->num_refs = 0; } +/** +Set the current write position +*/ +void cb_set_pos(codeblock_t* cb, size_t pos) +{ + assert (pos < cb->mem_size); + cb->write_pos = pos; +} + // Get a direct pointer into the executable memory block uint8_t* cb_get_ptr(codeblock_t* cb, size_t index) { @@ -128,8 +155,27 @@ void cb_write_epilogue(codeblock_t* cb) https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L155 cb_write_byte(cb, ujit_post_call_bytes[i]); } +// Check if an operand needs a rex byte to be encoded +bool rex_needed(x86opnd_t opnd) +{ + if (opnd.type == OPND_REG) + { + return ( + opnd.reg.reg_no > 7 || + (opnd.num_bits == 8 && opnd.reg.reg_no >= 4 && opnd.reg.reg_no <= 7) + ); + } + + if (opnd.type == OPND_MEM) + { + return (opnd.mem.base_reg_no > 7) || (opnd.mem.has_idx && opnd.mem.idx_reg_no > 7); + } + + assert (false); +} + // Write the REX byte -void writeREX( +static void cb_write_rex( codeblock_t* cb, bool w_flag, uint8_t reg_no, @@ -153,13 +199,12 @@ void writeREX( https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L199 } // Write an opcode byte with an embedded register operand -/*static void cb_write_opcode(codeblock_t* cb, uint8_t opcode, X86Reg rOpnd) +static void cb_write_opcode(codeblock_t* cb, uint8_t opcode, x86opnd_t reg) { // Write the reg field into the opcode byte - uint8_t op_byte = opcode | (rOpnd.regNo & 7); + uint8_t op_byte = opcode | (reg.reg.reg_no & 7); cb_write_byte(cb, op_byte); } -*/ // nop - Noop, one or multiple bytes long void nop(codeblock_t* cb, size_t length) @@ -228,28 +273,35 @@ void nop(codeblock_t* cb, size_t length) https://github.com/ruby/ruby/blob/trunk/ujit_asm.c#L273 } } -/* /// push - Push a register on the stack -void push(codeblock_t* cb, X86Reg reg) +void push(codeblock_t* cb, x86opnd_t reg) { - assert (reg.size is 64, "can only push 64-bit registers"); + assert (reg.num_bits == 64); //cb.writeASM("push", reg); - if (reg.rexNeeded) - cb_write_rex(cb, false, 0, 0, reg.regNo); - cb_write_byte(cb, 0x50, reg); + if (rex_needed(reg)) + cb_write_rex(cb, false, 0, 0, reg.reg.reg_no); + + cb_write_opcode(cb, 0x50, reg); } /// pop - Pop a register off the stack -void pop(codeblock_t* cb, X86Reg reg) +void pop(codeblock_t* cb, x86opnd_t reg) { - assert (reg.size is 64); + assert (reg.num_bits == 64); //cb.writeASM("pop", reg); - if (reg.rexNeeded) - cb_write_rex(false, 0, 0, reg.regNo); - cb_write_byte(cb, 0x58, reg); + if (rex_needed(reg)) + cb_write_rex(cb, false, 0, 0, reg.reg.reg_no); + + cb_write_opcode(cb, 0x58, reg); +} + +/// ret - Return from call, popping only the return address +void ret(codeblock_t* cb) +{ + //cb.writeASM("ret"); + cb_write_byte(cb, 0xC3); } -*/ diff --git a/ujit_asm.h b/ujit_asm.h index b192588c69..13f0698916 100644 --- a/ujit_asm.h +++ b/ujit_asm.h @@ -11,9 +11,10 @@ https://github.com/ruby/ruby/blob/trunk/ujit_asm.h#L11 // Maximum number of label references #define MAX_LABEL_REFS 32 +// Reference to an ASM label typedef struct LabelRef { - // Position where the label reference is in the code block + // Position in the code block where the label reference exists size_t pos; // Label which this refers to @@ -21,6 +22,7 @@ typedef struct LabelRef https://github.com/ruby/ruby/blob/trunk/ujit_asm.h#L22 } labelref_t; +// Block of executable memory into which instructions can be written typedef struct CodeBlock { // Memory block @@ -51,15 +53,101 @@ typedef struct CodeBlock https://github.com/ruby/ruby/blob/trunk/ujit_asm.h#L53 } codeblock_t; +enum OpndType +{ + OPND_NONE, + OPND_REG, + OPND_IMM, + OPND_MEM, + OPND_IPREL +}; + +enum RegType +{ + REG_GP, + REG_FP, + REG_XMM, + REG_IP +}; + +typedef struct X86Reg +{ + // Register type + uint8_t reg_type; + + // Register index number + uint8_t reg_no; + +} x86reg_t; + +typedef struct X86Mem +{ + /// Base register number + uint8_t base_reg_no; + + /// Index register number + uint8_t idx_reg_no; + + /// SIB scale exponent value (power of two, two bits) + uint8_t scale_exp; + + /// Has index register flag + bool has_idx; + + // FIXME: do we need this, or can base reg just be RIP? + /// IP-relative addressing flag + bool is_iprel; + + /// Constant displacement from the base, not scaled + int32_t disp; + +} x86mem_t; + typedef struct X86Opnd { + // Operand type + uint8_t type; + // Size in bits + uint16_t num_bits; + union + { + // Register operand + x86reg_t reg; + // Memory operand + x86mem_t mem; + + // Signed immediate value + int64_t imm; + + // Unsigned immediate value + uint64_t unsgImm; + }; } x86opnd_t; +// 64-bit GP registers +const x86opnd_t RAX; +const x86opnd_t RCX; +const x86opnd_t RDX; +const x86opnd_t RBX; +const x86opnd_t RBP; +const x86opnd_t RSP; +const x86opnd_t RSI; +const x86opnd_t RDI; +const x86opnd_t R8; +const x86opnd_t R9; +const x86opnd_t R10; +const x86opnd_t R11; +const x86opnd_t R12; +const x86opnd_t R13; +const x86opnd_t R14; +const x86opnd_t R15; + void cb_init(codeblock_t* cb, size_t mem_size); +void cb_set_pos(codeblock_t* cb, size_t pos); uint8_t* cb_get_ptr(codeblock_t* cb, size_t index); void cb_write_byte(codeblock_t* cb, uint8_t byte); void cb_write_bytes(codeblock_t* cb, size_t num_bytes, ...); @@ -69,7 +157,13 @@ void cb_write_int(codeblock_t* cb, uint64_t val, size_t num_bits); https://github.com/ruby/ruby/blob/trunk/ujit_asm.h#L157 void cb_write_prologue(codeblock_t* cb); void cb_write_epilogue(codeblock_t* cb); +// Encode individual instructions into a code block void nop(codeblock_t* cb, size_t length); +void push(codeblock_t* cb, x86opnd_t reg); +void pop(codeblock_t* cb, x86opnd_t reg); +void ret(codeblock_t* cb); + + diff --git a/ujit_asm_tests.c b/ujit_asm_tests.c index fc8005e56e..6cc5f1e16a 100644 --- a/ujit_asm_tests.c +++ b/ujit_asm_tests.c @@ -1,27 +1,68 @@ https://github.com/ruby/ruby/blob/trunk/ujit_asm_tests.c#L1 #include <stdio.h> #include <stdlib.h> +#include <string.h> +#include <assert.h> #include "ujit_asm.h" -//fprintf(stderr, format); -//exit(-1) - -// TODO: make a macro to test encoding sequences -// ***You can use sizeof to know the length*** -// CHECK_BYTES(cb, {}) - - - - +// Check that the code block contains the given sequence of bytes +void check_bytes(codeblock_t* cb, const char* bytes) +{ + printf("checking encoding: %s\n", bytes); + + size_t len = strlen(bytes); + assert (len % 2 == 0); + size_t num_bytes = len / 2; + + if (cb->write_pos != num_bytes) + { + fprintf(stderr, "incorrect encoding length %ld\n", cb->write_pos); + exit(-1); + } + + for (size_t i = 0; i < num_bytes; ++i) + { + char byte_str[] = {0, 0, 0, 0}; + strncpy(byte_str, bytes + (2 * i), 2); (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/