ruby-changes:74063
From: Takashi <ko1@a...>
Date: Tue, 18 Oct 2022 02:46:15 +0900 (JST)
Subject: [ruby-changes:74063] 64c52c4282 (master): YJIT: Interleave inline and outlined code blocks (#6460)
https://git.ruby-lang.org/ruby.git/commit/?id=64c52c4282 From 64c52c428285e7930aed62740cc9c54ee483178e Mon Sep 17 00:00:00 2001 From: Takashi Kokubun <takashikkbn@g...> Date: Mon, 17 Oct 2022 10:45:59 -0700 Subject: YJIT: Interleave inline and outlined code blocks (#6460) Co-authored-by: Alan Wu <alansi.xingwu@s...> Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@s...> --- yjit/src/asm/mod.rs | 240 ++++++++++++++++++++++++++++++++++------- yjit/src/backend/arm64/mod.rs | 103 +++++++++++------- yjit/src/backend/ir.rs | 17 +-- yjit/src/backend/tests.rs | 6 +- yjit/src/backend/x86_64/mod.rs | 36 +++++-- yjit/src/codegen.rs | 80 +++++--------- yjit/src/core.rs | 12 ++- yjit/src/options.rs | 36 ++++++- yjit/src/utils.rs | 9 +- 9 files changed, 378 insertions(+), 161 deletions(-) diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 8356201ba6..1ab813964c 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -1,9 +1,20 @@ https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/mod.rs#L1 +use std::cell::RefCell; +use std::cmp; use std::fmt; use std::mem; +use std::rc::Rc; +#[cfg(target_arch = "x86_64")] +use crate::backend::x86_64::JMP_PTR_BYTES; +#[cfg(target_arch = "aarch64")] +use crate::backend::arm64::JMP_PTR_BYTES; +use crate::backend::ir::Assembler; +use crate::backend::ir::Target; +use crate::virtualmem::WriteError; #[cfg(feature = "asm_comments")] use std::collections::BTreeMap; +use crate::codegen::CodegenGlobals; use crate::virtualmem::{VirtualMem, CodePtr}; // Lots of manual vertical alignment in there that rustfmt doesn't handle well. @@ -17,7 +28,8 @@ pub mod arm64; https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/mod.rs#L28 // /// Reference to an ASM label -struct LabelRef { +#[derive(Clone)] +pub struct LabelRef { // Position in the code block where the label reference exists pos: usize, @@ -36,7 +48,7 @@ struct LabelRef { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/mod.rs#L48 /// Block of memory into which instructions can be assembled pub struct CodeBlock { // Memory for storing the encoded instructions - mem_block: VirtualMem, + mem_block: Rc<RefCell<VirtualMem>>, // Memory block size mem_size: usize, @@ -44,6 +56,12 @@ pub struct CodeBlock { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/mod.rs#L56 // Current writing position write_pos: usize, + // Size of a code page (inlined + outlined) + page_size: usize, + + // Size reserved for writing a jump to the next page + page_end_reserve: usize, + // Table of registered label addresses label_addrs: Vec<usize>, @@ -58,7 +76,6 @@ pub struct CodeBlock { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/mod.rs#L76 asm_comments: BTreeMap<usize, Vec<String>>, // True for OutlinedCb - #[cfg(feature = "disasm")] pub outlined: bool, // Set if the CodeBlock is unable to output some instructions, @@ -67,27 +84,158 @@ pub struct CodeBlock { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/mod.rs#L84 dropped_bytes: bool, } +/// Set of CodeBlock label states. Used for recovering the previous state. +pub struct LabelState { + label_addrs: Vec<usize>, + label_names: Vec<String>, + label_refs: Vec<LabelRef>, +} + impl CodeBlock { /// Make a new CodeBlock - pub fn new(mem_block: VirtualMem, outlined: bool) -> Self { - Self { - mem_size: mem_block.virtual_region_size(), + pub fn new(mem_block: Rc<RefCell<VirtualMem>>, page_size: usize, outlined: bool) -> Self { + let mem_size = mem_block.borrow().virtual_region_size(); + let mut cb = Self { mem_block, + mem_size, write_pos: 0, + page_size, + page_end_reserve: JMP_PTR_BYTES, label_addrs: Vec::new(), label_names: Vec::new(), label_refs: Vec::new(), #[cfg(feature = "asm_comments")] asm_comments: BTreeMap::new(), - #[cfg(feature = "disasm")] outlined, dropped_bytes: false, + }; + cb.write_pos = cb.page_start(); + cb + } + + /// Move the CodeBlock to the next page. If it's on the furthest page, + /// move the other CodeBlock to the next page as well. + pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool { + let old_write_ptr = self.get_write_ptr(); + self.set_write_ptr(base_ptr); + self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES))); + + // Move self to the next page + let next_page_idx = self.write_pos / self.page_size + 1; + if !self.set_page(next_page_idx, &jmp_ptr) { + self.set_write_ptr(old_write_ptr); // rollback if there are no more pages + return false; + } + + // Move the other CodeBlock to the same page if it'S on the furthest page + self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr); + + return !self.dropped_bytes; + } + + /// Move the CodeBlock to page_idx only if it's not going backwards. + fn set_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, page_idx: usize, jmp_ptr: &F) -> bool { + // Do not move the CodeBlock if page_idx points to an old position so that this + // CodeBlock will not overwrite existing code. + // + // Let's say this is the current situation: + // cb: [page1, page2, page3 (write_pos)], ocb: [page1, page2, page3 (write_pos)] + // + // When cb needs to patch page1, this will be temporarily changed to: + // cb: [page1 (write_pos), page2, page3], ocb: [page1, page2, page3 (write_pos)] + // + // While patching page1, cb may need to jump to page2. What set_page currently does is: + // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2, page3 (write_pos)] + // instead of: + // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2 (write_pos), page3] + // because moving ocb's write_pos from page3 to the beginning of page2 will let ocb's + // write_pos point to existing code in page2, which might let ocb overwrite it later. + // + // We could remember the last write_pos in page2 and let set_page use that position, + // but you need to waste some space for keeping write_pos for every single page. + // It doesn't seem necessary for performance either. So we're currently not doing it. + let mut dst_pos = self.page_size * page_idx + self.page_start(); + if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos { + // Reset dropped_bytes + self.dropped_bytes = false; + + // Convert dst_pos to dst_ptr + let src_pos = self.write_pos; + self.write_pos = dst_pos; + let dst_ptr = self.get_write_ptr(); + self.write_pos = src_pos; + + // Generate jmp_ptr from src_pos to dst_pos + self.without_page_end_reserve(|cb| { + cb.add_comment("jump to next page"); + jmp_ptr(cb, dst_ptr); + assert!(!cb.has_dropped_bytes()); + }); + + // Start the next code from dst_pos + self.write_pos = dst_pos; } + !self.dropped_bytes + } + + /// write_pos of the current page start + pub fn page_start_pos(&self) -> usize { + self.get_write_pos() / self.page_size * self.page_size + self.page_start() + } + + /// Offset of each page where CodeBlock should start writing + pub fn page_start(&self) -> usize { + let mut start = if self.inline() { + 0 + } else { + self.page_size / 2 + }; + if cfg!(debug_assertions) && !cfg!(test) { + // Leave illegal instructions at the beginning of each page to assert + // we're not accidentally crossing page boundaries. + start += JMP_PTR_BYTES; + } + start + } + + /// Offset of each page where CodeBlock should stop writing (exclusive) + pub fn page_end(&self) -> usize { + let page_end = if self.inline() { + self.page_size / 2 + } else { + self.page_size + }; + page_end - self.page_end_reserve // reserve space to jump to the next page + } + + /// Call a given function with page_end_reserve = 0 + pub fn without_page_end_reserve<F: Fn(&mut Self)>(&mut self, block: F) { + let old_page_end_reserve = self.page_end_reserve; + self.page_end_reserve = 0; + block(self); + self.page_end_reserve = old_page_end_reserve; + } + + /// Return the address ranges of a given address range that this CodeBlock can write. + pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> { + let mut addrs = vec![]; + let mut start = start_ptr.raw_ptr() as usize; + let codeblock_end = self.get_ptr(self.get_mem_size()).raw_ptr() as usize; + let end = std::cmp::min(end_ptr.raw_ptr() as usize, codeblock_end); + while start < end { + let current_page = start / self.page_size * self.page_size; + let page_end = std::cmp::min(end, current_page + self.page_end()) as usize; + addrs.push((start, page_end)); + start = current_page + self.page_size + self.page_start(); + } + addrs } /// Check if this code block has sufficient remaining capacity pub fn has_capacity(&self, num_bytes: usize) -> bool { - self.write_pos + num_bytes < self.mem_size + let page_offset = self.write_pos % self.page_size; + let capacity = self.page_end().saturating_sub(page_offset); + num_bytes <= capacity } /// Add an assembly comment if the feature is on. @ (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/