ruby-changes:69958

https://git.ruby-lang.org/ruby.git/commit/?id=b5b6ab4194

From b5b6ab4194f16e96ee5004288cc469ac1bca41a3 Mon Sep 17 00:00:00 2001
From: Alan Wu <XrXr@u...>
Date: Fri, 26 Nov 2021 18:00:42 -0500
Subject: YJIT: Add ability to exit to interpreter from stubs

Previously, YJIT assumed that it's always possible to generate a new
basic block when servicing a stub in branch_stub_hit(). When YJIT is out
of executable memory, for example, this assumption doesn't hold up.

Add handling to branch_stub_hit() for servicing stubs without consuming
more executable memory by adding a code path that exits to the
interpreter at the location the branch stub represents. The new code
path reconstructs interpreter state in branch_stub_hit() and then exits
with a new snippet called `code_for_exit_from_stub` that returns
`Qundef` from the YJIT native stack frame.

As this change adds another place where we regenerate code from
`branch_t`, extract the logic for it into a new function and call it
regenerate_branch(). While we are at it, make the branch shrinking code
path in branch_stub_hit() more explicit.

This new functionality is hard to test without full support for out of
memory conditions. To verify this change, I ran
`RUBY_YJIT_ENABLE=1 make check -j12` with the following patch to stress
test the new code path:

```diff
diff --git a/yjit_core.c b/yjit_core.c
index 4ab63d9806..5788b8c5ed 100644
--- a/yjit_core.c
+++ b/yjit_core.c
@@ -878,8 +878,12 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex https://github.com/ruby/ruby/blob/trunk/yjit_core.c#L878
                 cb_set_write_ptr(cb, branch->end_addr);
             }

+if (rand() < RAND_MAX/2) {
             // Compile the new block version
             p_block = gen_block_version(target, target_ctx, ec);
+}else{
+    p_block = NULL;
+}

             if (!p_block && branch_modified) {
                 // We couldn't generate a new block for the branch, but we modified the branch.
```

We can enable the new test along with other OOM tests once full support
lands.

Other small changes:
 * yjit_utils.c (print_str): Update to work with new native frame shape.
       Follow up for 8fa0ee4d404.
 * yjit_iface.c (rb_yjit_init): Run yjit_init_core() after
       yjit_init_codegen() so `cb` and `ocb` are available.
---
 bootstraptest/test_yjit.rb |  18 ++++++
 yjit.c                     |   2 +
 yjit_codegen.c             |  20 ++++++
 yjit_codegen.h             |   2 +
 yjit_core.c                | 152 +++++++++++++++++++++++++++++++--------------
 yjit_core.h                |   6 +-
 yjit_iface.c               |   5 +-
 yjit_utils.c               |   4 --
 8 files changed, 154 insertions(+), 55 deletions(-)

diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb
index 28fe9446ecc..ee8991833cb 100644
--- a/bootstraptest/test_yjit.rb
+++ b/bootstraptest/test_yjit.rb
@@ -2434,6 +2434,24 @@ assert_equal 'ok', %q{ https://github.com/ruby/ruby/blob/trunk/bootstraptest/test_yjit.rb#L2434
   A.new.use 1
 }
 
+assert_equal 'ok', %q{
+  # test hitting a branch stub when out of memory
+  def nimai(jita)
+    if jita
+      :ng
+    else
+      :ok
+    end
+  end
+
+  nimai(true)
+  nimai(true)
+
+  RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT)
+
+  nimai(false)
+} if false  # disabled for now since OOM crashes in the test harness
+
 # block invalidation while out of memory
 assert_equal 'new', %q{
   def foo
diff --git a/yjit.c b/yjit.c
index 33517ca36df..56173a13604 100644
--- a/yjit.c
+++ b/yjit.c
@@ -123,6 +123,8 @@ YJIT_DECLARE_COUNTERS( https://github.com/ruby/ruby/blob/trunk/yjit.c#L123
     compiled_iseq_count,
     compiled_block_count,
 
+    exit_from_branch_stub,
+
     invalidation_count,
     invalidate_method_lookup,
     invalidate_bop_redefined,
diff --git a/yjit_codegen.c b/yjit_codegen.c
index 26362a7064f..2cd4fd2bda0 100644
--- a/yjit_codegen.c
+++ b/yjit_codegen.c
@@ -382,6 +382,26 @@ yjit_gen_leave_exit(codeblock_t *cb) https://github.com/ruby/ruby/blob/trunk/yjit_codegen.c#L382
     return code_ptr;
 }
 
+// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
+// to the interpreter when it cannot service a stub by generating new code.
+// Before coming here, branch_stub_hit() takes care of fully reconstructing
+// interpreter state.
+static void
+gen_code_for_exit_from_stub(void)
+{
+    codeblock_t *cb = ocb;
+    code_for_exit_from_stub = cb_get_ptr(cb, cb->write_pos);
+
+    GEN_COUNTER_INC(cb, exit_from_branch_stub);
+
+    pop(cb, REG_SP);
+    pop(cb, REG_EC);
+    pop(cb, REG_CFP);
+
+    mov(cb, RAX, imm_opnd(Qundef));
+    ret(cb);
+}
+
 // :side-exit:
 // Get an exit for the current instruction in the outlined block. The code
 // for each instruction often begins with several guards before proceeding
diff --git a/yjit_codegen.h b/yjit_codegen.h
index 4ae2536423f..bbd29e671b8 100644
--- a/yjit_codegen.h
+++ b/yjit_codegen.h
@@ -16,6 +16,8 @@ static uint8_t *yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq); https://github.com/ruby/ruby/blob/trunk/yjit_codegen.h#L16
 
 static void yjit_gen_block(block_t *block, rb_execution_context_t *ec);
 
+static void gen_code_for_exit_from_stub(void);
+
 static void yjit_init_codegen(void);
 
 #endif // #ifndef YJIT_CODEGEN_H
diff --git a/yjit_core.c b/yjit_core.c
index 32e0575d75d..4460d325fc3 100644
--- a/yjit_core.c
+++ b/yjit_core.c
@@ -9,6 +9,10 @@ https://github.com/ruby/ruby/blob/trunk/yjit_core.c#L9
 #include "yjit_core.h"
 #include "yjit_codegen.h"
 
+// For exiting from YJIT frame from branch_stub_hit().
+// Filled by gen_code_for_exit_from_stub().
+static uint8_t *code_for_exit_from_stub = NULL;
+
 /*
 Get an operand for the adjusted stack pointer address
 */
@@ -597,6 +601,52 @@ add_block_version(blockid_t blockid, block_t *block) https://github.com/ruby/ruby/blob/trunk/yjit_core.c#L601
 #endif
 }
 
+static ptrdiff_t
+branch_code_size(const branch_t *branch)
+{
+    return branch->end_addr - branch->start_addr;
+}
+
+// Generate code for a branch, possibly rewriting and changing the size of it
+static void
+regenerate_branch(codeblock_t *cb, branch_t *branch)
+{
+    if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
+        // Generating this branch would modify frozen bytes. Do nothing.
+        return;
+    }
+
+    const uint32_t old_write_pos = cb->write_pos;
+    const bool branch_terminates_block = branch->end_addr == branch->block->end_addr;
+
+    RUBY_ASSERT(branch->dst_addrs[0] != NULL);
+
+    cb_set_write_ptr(cb, branch->start_addr);
+    branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
+    branch->end_addr = cb_get_write_ptr(cb);
+
+    if (branch_terminates_block) {
+        // Adjust block size
+        branch->block->end_addr = branch->end_addr;
+    }
+
+    // cb->write_pos is both a write cursor and a marker for the end of
+    // everything written out so far. Leave cb->write_pos at the end of the
+    // block before returning. This function only ever bump or retain the end
+    // of block marker since that's what the majority of callers want. When the
+    // branch sits at the very end of the codeblock and it shrinks after
+    // regeneration, it's up to the caller to drop bytes off the end to
+    // not leave a gap and implement branch->shape.
+    if (old_write_pos > cb->write_pos) {
+        // We rewound cb->write_pos to generate the branch, now restore it.
+        cb_set_pos(cb, old_write_pos);
+    }
+    else {
+        // The branch sits at the end of cb and consumed some memory.
+        // Keep cb->write_pos.
+    }
+}
+
 // Create a new outgoing branch entry for a block
 static branch_t*
 make_branch_entry(block_t *block, const ctx_t *src_ctx, branchgen_fn gen_fn)
@@ -777,13 +827,15 @@ gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_context_t https://github.com/ruby/ruby/blob/trunk/yjit_core.c#L827
 static uint8_t *
 branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_context_t *ec)
 {
-    uint8_t *dst_addr;
+    uint8_t *dst_addr = NULL;
 
     // Stop other ractors since we are going to patch machine code.
     // This is how the GC does it.
     RB_VM_LOCK_ENTER();
     rb_vm_barrier();
 
+    const ptrdiff_t branch_size_on_entry = branch_code_size(branch);
+
     RUBY_ASSERT(branch != NULL);
     RUBY_ASSERT(target_idx < 2);
     blockid_t target = branch->targets[target_idx];
@@ -794,18 +846,13 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex https://github.com/ruby/ruby/blob/trunk/yjit_core.c#L846
     if (branch->blocks[target_idx]) {
         dst_addr = branch->dst_addrs[target_idx];
     }
-    else
-    {
-        //fprintf(stderr, "\nstub hit, branch: %p, target idx: %d\n", branch, target_idx);
-        //fprintf(stderr, "blockid.iseq=%p, blockid.idx=%d\n", target.iseq, target.idx);
-        //fprintf(stderr, "chain_depth=%d\n", target_ctx->chain_depth);
-
+    else {
         // :stub-sp-flush:
         // Generated code do stack operations without modifying cfp->sp, while the
         // cfp->sp tells the GC what values on the stack to root. Generated code
         // generally takes care of updating cfp->sp when it calls runtime routines that
-        // could trigger GC, but for the case of branch stubs, it's inconvenient. So
-        // we do it here.
+        // could trigger GC, but it's inconvenient to do it before calling this function.
+        // So we do it here instead.
         VALUE *const original_interp_sp = ec->cfp->sp;
         ec->cfp->sp += target_ctx->sp_offset;
 
@@ -818,8 +865,11 @@ branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_contex https://github.com/ruby/ruby/blob/trunk/yjit_core.c#L865
 
         // If this block hasn't yet been compiled
         if (!p_block) {
+            const uint8_t branch_old_shape = branch->shape;
+            bool branch_modified = false;
+
             // If the new block can be generated right after the  (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/