ruby-changes:73052
From: Noah <ko1@a...>
Date: Thu, 25 Aug 2022 02:43:01 +0900 (JST)
Subject: [ruby-changes:73052] b4be3c00c5 (master): add --yjit-dump-iseqs param (https://github.com/Shopify/ruby/pull/332)
https://git.ruby-lang.org/ruby.git/commit/?id=b4be3c00c5 From b4be3c00c5737649166db676278fd28f768a5e3c Mon Sep 17 00:00:00 2001 From: Noah Gibbs <the.codefolio.guy@g...> Date: Thu, 28 Jul 2022 16:45:08 +0100 Subject: add --yjit-dump-iseqs param (https://github.com/Shopify/ruby/pull/332) --- yjit.c | 12 ++++++ yjit/bindgen/src/main.rs | 7 ++++ yjit/src/core.rs | 27 ++++++++++++++ yjit/src/cruby_bindings.inc.rs | 26 +++++++++++++ yjit/src/disasm.rs | 84 ++++++++++++++++++++++-------------------- yjit/src/options.rs | 20 +++++++++- yjit/src/utils.rs | 35 ++++++++++++++++++ 7 files changed, 170 insertions(+), 41 deletions(-) diff --git a/yjit.c b/yjit.c index 1a2f71a959..0dddcfdc5a 100644 --- a/yjit.c +++ b/yjit.c @@ -399,6 +399,18 @@ rb_str_bytesize(VALUE str) https://github.com/ruby/ruby/blob/trunk/yjit.c#L399 return LONG2NUM(RSTRING_LEN(str)); } +unsigned long +rb_RSTRING_LEN(VALUE str) +{ + return RSTRING_LEN(str); +} + +char * +rb_RSTRING_PTR(VALUE str) +{ + return RSTRING_PTR(str); +} + // This is defined only as a named struct inside rb_iseq_constant_body. // By giving it a separate typedef, we make it nameable by rust-bindgen. // Bindgen's temp/anon name isn't guaranteed stable. diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index f54addc795..f8d87aeec8 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -70,6 +70,9 @@ fn main() { https://github.com/ruby/ruby/blob/trunk/yjit/bindgen/src/main.rs#L70 .allowlist_function("rb_str_buf_append") .allowlist_function("rb_str_dup") + // From encindex.h + .allowlist_type("ruby_preserved_encindex") + // This struct is public to Ruby C extensions // From include/ruby/internal/core/rbasic.h .allowlist_type("RBasic") @@ -240,6 +243,7 @@ fn main() { https://github.com/ruby/ruby/blob/trunk/yjit/bindgen/src/main.rs#L243 .allowlist_var("VM_ENV_DATA_INDEX_SPECVAL") .allowlist_var("VM_ENV_DATA_INDEX_FLAGS") .allowlist_var("VM_ENV_DATA_SIZE") + .allowlist_function("rb_iseq_path") // From yjit.c .allowlist_function("rb_iseq_(get|set)_yjit_payload") @@ -265,6 +269,8 @@ fn main() { https://github.com/ruby/ruby/blob/trunk/yjit/bindgen/src/main.rs#L269 .allowlist_function("rb_yjit_for_each_iseq") .allowlist_function("rb_yjit_obj_written") .allowlist_function("rb_yjit_str_simple_append") + .allowlist_function("rb_RSTRING_PTR") + .allowlist_function("rb_RSTRING_LEN") .allowlist_function("rb_ENCODING_GET") .allowlist_function("rb_yjit_exit_locations_dict") @@ -282,6 +288,7 @@ fn main() { https://github.com/ruby/ruby/blob/trunk/yjit/bindgen/src/main.rs#L288 .allowlist_function("rb_vm_insn_addr2opcode") .allowlist_function("rb_iseqw_to_iseq") .allowlist_function("rb_iseq_each") + .allowlist_function("rb_iseq_method_name") // From builtin.h .allowlist_type("rb_builtin_function.*") diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 64585653d9..cb026f6a3b 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -6,6 +6,8 @@ use crate::cruby::*; https://github.com/ruby/ruby/blob/trunk/yjit/src/core.rs#L6 use crate::options::*; use crate::stats::*; use crate::utils::*; +#[cfg(feature="disasm")] +use crate::disasm::*; use core::ffi::c_void; use std::cell::*; use std::hash::{Hash, Hasher}; @@ -1426,6 +1428,20 @@ fn gen_block_series_body( https://github.com/ruby/ruby/blob/trunk/yjit/src/core.rs#L1428 last_blockref = new_blockref; } + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is active, see if this iseq's location matches the given substring. + // If so, we print the new blocks to the console. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(blockid.iseq); + if iseq_location.contains(substr) { + let last_block = last_blockref.borrow(); + println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, blockid.idx, last_block.end_idx); + println!("{}", disasm_iseq_insn_range(blockid.iseq, blockid.idx, last_block.end_idx)); + } + } + } + Some(first_block) } @@ -1956,6 +1972,17 @@ pub fn invalidate_block_version(blockref: &BlockRef) { https://github.com/ruby/ruby/blob/trunk/yjit/src/core.rs#L1972 verify_blockid(block.blockid); + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(block.blockid.iseq); + if iseq_location.contains(substr) { + println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, block.blockid.idx, block.end_idx); + } + } + } + // Remove this block from the version array remove_block_version(blockref); diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 31f09ef98d..a329dadc9b 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -246,6 +246,20 @@ pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22; https://github.com/ruby/ruby/blob/trunk/yjit/src/cruby_bindings.inc.rs#L246 pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608; pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42; pub type ruby_encoding_consts = u32; +pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0; +pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1; +pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2; +pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3; +pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4; +pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5; +pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6; +pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7; +pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8; +pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9; +pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10; +pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11; +pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12; +pub type ruby_preserved_encindex = u32; extern "C" { pub fn rb_obj_info_dump(obj: VALUE); } @@ -649,6 +663,9 @@ pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4; https://github.com/ruby/ruby/blob/trunk/yjit/src/cruby_bindings.inc.rs#L663 pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8; pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16; pub type vm_frame_env_flags = u32; +extern "C" { + pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE; +} extern "C" { pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE; } @@ -969,6 +986,9 @@ extern "C" { https://github.com/ruby/ruby/blob/trunk/yjit/src/cruby_bindings.inc.rs#L986 extern "C" { pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t; } +extern "C" { + pub fn rb_iseq_method_name(iseq: *const rb_iseq_t) -> VALUE; +} extern "C" { pub fn rb_vm_barrier(); } @@ -1020,6 +1040,12 @@ extern "C" { https://github.com/ruby/ruby/blob/trunk/yjit/src/cruby_bindings.inc.rs#L1040 extern "C" { pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; } +extern "C" { + pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong; +} +extern "C" { + pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; +} pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; extern "C" { pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool; diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs index 2082648c4a..83c80d6c66 100644 --- a/yjit/src/disasm.rs +++ b/yjit/src/disasm.rs @@ -26,15 +26,17 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU https://github.com/ruby/ruby/blob/trunk/yjit/src/disasm.rs#L26 // Get the iseq pointer from the wrapper let iseq = unsafe { rb_iseqw_to_iseq(iseqw) }; - let out_string = disasm_iseq(iseq); + // This will truncate disassembly of methods with 10k+ bytecodes. + // That's a good thing - this prints to console. + let out_string = disasm_iseq_insn_range(iseq, 0, 9999); return rust_str_to_ruby(&out_string); } } #[cfg(feature = "disasm")] -fn disasm_iseq(iseq: IseqPtr) -> String { - let mut out = String::from(""); +pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String { + let mut out = String::from(""); // Get a list of block versions generated for this iseq let mut block_list = get_iseq_block_list(iseq); @@ -84,47 +86,49 @@ fn disasm_iseq(iseq: IseqPtr) -> String { https://github.com/ruby/ruby/blob/trunk/yjit/src/disasm.rs#L86 for block_idx in 0..block_list.len() { let block = block_list[block_idx].borrow(); let blockid = block.get_blockid(); - let end_idx = block.get_end_idx(); - let start_addr = block.get_start_addr().unwrap().raw_ptr(); - let end_addr = block.get_end_addr().unwrap().raw_ptr(); - let code_size = block.code_size(); - - // Write some info about the current block - let block_ident = format!( - "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", - block_idx + 1, - block_list.len(), - blockid.idx, - end_idx, - code_size - ); - out.push_str(&format!("== {:=<60}\n", block_ident)); - - // Disassemble the instructio (... truncated) -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/