[前][次][番号順一覧][スレッド一覧]

ruby-changes:72160

From: Alan <ko1@a...>
Date: Tue, 14 Jun 2022 23:23:34 +0900 (JST)
Subject: [ruby-changes:72160] 9f09397bfe (master): YJIT: On-demand executable memory allocation; faster boot (#5944)

https://git.ruby-lang.org/ruby.git/commit/?id=9f09397bfe

From 9f09397bfe6762bf19ef47b2f60988e49b80560d Mon Sep 17 00:00:00 2001
From: Alan Wu <XrXr@u...>
Date: Tue, 14 Jun 2022 10:23:13 -0400
Subject: YJIT: On-demand executable memory allocation; faster boot (#5944)

This commit makes YJIT allocate memory for generated code gradually as
needed. Previously, YJIT allocates all the memory it needs on boot in
one go, leading to higher than necessary resident set size (RSS) and
time spent on boot initializing the memory with a large memset().

Users should no longer need to search for a magic number to pass to
`--yjit-exec-mem` since physical memory consumption should now more
accurately reflect the requirement of the workload.

YJIT now reserves a range of addresses on boot. This region start out
with no access permission at all so buggy attempts to jump to the region
crashes like before this change. To get this hardening at finer
granularity than the page size, we fill each page with trapping
instructions when we first allocate physical memory for the page.

Most of the time applications don't need 256 MiB of executable code, so
allocating on-demand ends up doing less total work than before. Case in
point, a simple `ruby --yjit-call-threshold=1 -eitself` takes about
half as long after this change. In terms of memory consumption, here is
a table to give a rough summary of the impact:

    | Peak RSS in MiB | -eitself example | railsbench once |
    | :-------------: | ---------------: | --------------: |
    |     before      |              265 |             377 |
    |      after      |               11 |             143 |
    |     no YJIT     |               10 |             101 |

A new module is introduced to handle allocation bookkeeping.
`CodePtr` is moved into the module since it has a close relationship
with the new `VirtualMemory` struct. This new interface has a slightly
smaller surface than before in that marking a region as writable is no
longer a public operation.
---
 yjit.c                         |  51 +++---
 yjit/bindgen/src/main.rs       |   1 +
 yjit/src/asm/mod.rs            | 144 ++++------------
 yjit/src/asm/x86_64/mod.rs     |   2 +-
 yjit/src/asm/x86_64/tests.rs   |   2 +-
 yjit/src/codegen.rs            |  55 +++++-
 yjit/src/core.rs               |  26 ++-
 yjit/src/cruby.rs              |   3 -
 yjit/src/cruby_bindings.inc.rs |   5 +-
 yjit/src/lib.rs                |   1 +
 yjit/src/stats.rs              |   6 +-
 yjit/src/virtualmem.rs         | 376 +++++++++++++++++++++++++++++++++++++++++
 12 files changed, 504 insertions(+), 168 deletions(-)
 create mode 100644 yjit/src/virtualmem.rs

diff --git a/yjit.c b/yjit.c
index 03930706ba..fe9098f0b5 100644
--- a/yjit.c
+++ b/yjit.c
@@ -56,7 +56,7 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM); https://github.com/ruby/ruby/blob/trunk/yjit.c#L56
 // types in C such as int, long, etc. and use `std::os::raw::c_long` and friends on
 // the Rust side.
 //
-// What's up with the long prefix? The "rb_" part is to apease `make leaked-globals`
+// What's up with the long prefix? The "rb_" part is to appease `make leaked-globals`
 // which runs on upstream CI. The rationale for the check is unclear to Alan as
 // we build with `-fvisibility=hidden` so only explicitly marked functions end
 // up as public symbols in libruby.so. Perhaps the check is for the static
@@ -66,13 +66,13 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM); https://github.com/ruby/ruby/blob/trunk/yjit.c#L66
 // The "_yjit_" part is for trying to be informative. We might want different
 // suffixes for symbols meant for Rust and symbols meant for broader CRuby.
 
-void
+bool
 rb_yjit_mark_writable(void *mem_block, uint32_t mem_size)
 {
     if (mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE)) {
-        rb_bug("Couldn't make JIT page region (%p, %lu bytes) writeable, errno: %s\n",
-            mem_block, (unsigned long)mem_size, strerror(errno));
+        return false;
     }
+    return true;
 }
 
 void
@@ -209,25 +209,29 @@ align_ptr(uint8_t *ptr, uint32_t multiple) https://github.com/ruby/ruby/blob/trunk/yjit.c#L209
 }
 #endif
 
-// Allocate a block of executable memory
+// Address space reservation. Memory pages are mapped on an as needed basis.
+// See the Rust mm module for details.
 uint8_t *
-rb_yjit_alloc_exec_mem(uint32_t mem_size)
+rb_yjit_reserve_addr_space(uint32_t mem_size)
 {
 #ifndef _WIN32
     uint8_t *mem_block;
 
     // On Linux
     #if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
+        uint32_t const page_size = (uint32_t)sysconf(_SC_PAGESIZE);
+        uint8_t *const cfunc_sample_addr = (void *)&rb_yjit_reserve_addr_space;
+        uint8_t *const probe_region_end = cfunc_sample_addr + INT32_MAX;
         // Align the requested address to page size
-        uint32_t page_size = (uint32_t)sysconf(_SC_PAGESIZE);
-        uint8_t *req_addr = align_ptr((uint8_t*)&rb_yjit_alloc_exec_mem, page_size);
+        uint8_t *req_addr = align_ptr(cfunc_sample_addr, page_size);
 
+        // Probe for addresses close to this function using MAP_FIXED_NOREPLACE
+        // to improve odds of being in range for 32-bit relative call instructions.
         do {
-            // Try to map a chunk of memory as executable
-            mem_block = (uint8_t*)mmap(
-                (void*)req_addr,
+            mem_block = mmap(
+                req_addr,
                 mem_size,
-                PROT_READ | PROT_EXEC,
+                PROT_NONE,
                 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
                 -1,
                 0
@@ -240,15 +244,15 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) https://github.com/ruby/ruby/blob/trunk/yjit.c#L244
 
             // +4MB
             req_addr += 4 * 1024 * 1024;
-        } while (req_addr < (uint8_t*)&rb_yjit_alloc_exec_mem + INT32_MAX);
+        } while (req_addr < probe_region_end);
 
     // On MacOS and other platforms
     #else
         // Try to map a chunk of memory as executable
-        mem_block = (uint8_t*)mmap(
-            (void*)rb_yjit_alloc_exec_mem,
+        mem_block = mmap(
+            (void *)rb_yjit_reserve_addr_space,
             mem_size,
-            PROT_READ | PROT_EXEC,
+            PROT_NONE,
             MAP_PRIVATE | MAP_ANONYMOUS,
             -1,
             0
@@ -258,10 +262,10 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) https://github.com/ruby/ruby/blob/trunk/yjit.c#L262
     // Fallback
     if (mem_block == MAP_FAILED) {
         // Try again without the address hint (e.g., valgrind)
-        mem_block = (uint8_t*)mmap(
+        mem_block = mmap(
             NULL,
             mem_size,
-            PROT_READ | PROT_EXEC,
+            PROT_NONE,
             MAP_PRIVATE | MAP_ANONYMOUS,
             -1,
             0
@@ -270,17 +274,10 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) https://github.com/ruby/ruby/blob/trunk/yjit.c#L274
 
     // Check that the memory mapping was successful
     if (mem_block == MAP_FAILED) {
-        perror("mmap call failed");
-        exit(-1);
+        perror("ruby: yjit: mmap:");
+        rb_bug("mmap failed");
     }
 
-    // Fill the executable memory with PUSH DS (0x1E) so that
-    // executing uninitialized memory will fault with #UD in
-    // 64-bit mode.
-    rb_yjit_mark_writable(mem_block, mem_size);
-    memset(mem_block, 0x1E, mem_size);
-    rb_yjit_mark_executable(mem_block, mem_size);
-
     return mem_block;
 #else
     // Windows not supported for now
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index 212013d70c..d8f3c98e89 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -239,6 +239,7 @@ fn main() { https://github.com/ruby/ruby/blob/trunk/yjit/bindgen/src/main.rs#L239
         .allowlist_function("rb_iseq_(get|set)_yjit_payload")
         .allowlist_function("rb_iseq_pc_at_idx")
         .allowlist_function("rb_iseq_opcode_at_pc")
+        .allowlist_function("rb_yjit_reserve_addr_space")
         .allowlist_function("rb_yjit_mark_writable")
         .allowlist_function("rb_yjit_mark_executable")
         .allowlist_function("rb_yjit_get_page_size")
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 1d31facb78..e16e856925 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -3,49 +3,16 @@ use std::mem; https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/mod.rs#L3
 #[cfg(feature = "asm_comments")]
 use std::collections::BTreeMap;
 
+use crate::virtualmem::{VirtualMem, CodePtr};
+
 // Lots of manual vertical alignment in there that rustfmt doesn't handle well.
 #[rustfmt::skip]
 pub mod x86_64;
 
-/// Pointer to a piece of machine code
-/// We may later change this to wrap an u32
-/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead.
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
-#[repr(C)]
-pub struct CodePtr(*const u8);
-
-impl CodePtr {
-    pub fn raw_ptr(&self) -> *const u8 {
-        let CodePtr(ptr) = *self;
-        return ptr;
-    }
-
-    fn into_i64(&self) -> i64 {
-        let CodePtr(ptr) = self;
-        *ptr as i64
-    }
-
-    #[allow(unused)]
-    fn into_usize(&self) -> usize {
-        let CodePtr(ptr) = self;
-        *ptr as usize
-    }
-}
-
-impl From<*mut u8> for CodePtr {
-    fn from(value: *mut u8) -> Self {
-        assert!(value as usize != 0);
-        return CodePtr(value);
-    }
-}
-
 //
 // TODO: need a field_size_of macro, to compute the size of a struct field in bytes
 //
 
-// 1 is not aligned so this won't match any pages
-const ALIGNED_WRITE_POSITION_NONE: usize = 1;
-
 /// Reference to an ASM label
 struct LabelRef {
     // Position in the code block where the label reference exists
@@ -57,13 +24,8 @@ struct LabelRef { https://github.com/ruby/ruby/blob/trunk/yjit/src/asm/mod.rs#L24
 
 /// Block of memory into which instructions can be assembled
 pub struct CodeBlock {
-    // Block of non-executable memory used for dummy code blocks
-    // This memory is owned by this block and (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]