[前][次][番号順一覧][スレッド一覧]

ruby-changes:72521

From: Eileen <ko1@a...>
Date: Wed, 13 Jul 2022 05:41:04 +0900 (JST)
Subject: [ruby-changes:72521] 59c6b7b7ab (master): Speed up --yjit-trace-exits code (#6106)

https://git.ruby-lang.org/ruby.git/commit/?id=59c6b7b7ab

From 59c6b7b7abefdf8bc93d7117a3893d581f3a6c90 Mon Sep 17 00:00:00 2001
From: "Eileen M. Uchitelle" <eileencodes@u...>
Date: Tue, 12 Jul 2022 16:40:49 -0400
Subject: Speed up --yjit-trace-exits code (#6106)

In a small script the speed of this feature isn't really noticeable but
on Rails it's very noticeable how slow this can be. This PR aims to
speed up two parts of the functionality.

1) The Rust exit recording code

Instead of adding all samples as we see them to the yjit_raw_samples and
yjit_line_samples, we can increment the counter on the ones we've seen
before. This will be faster on traces where we are hitting the same
stack often. In a crude measurement of booting just the active record
base test (`test/cases/base_test.rb`) we found that this improved the
speed by 1 second.

This also results in a smaller marshal dump file which sped up the test
boot time by 4 seconds with trace exits on.

2) The Ruby parsing code

Previously we were allocating new arrays using `shift` and
`each_with_index`. This change avoids allocating new arrays by using an
index. This change saves us the most amount of time, gaining 11 seconds.

Before this change the test boot time took 62 seconds, after it took 47
seconds. This is still too long but it's a step closer to faster
functionality. Next we're going to tackle allowing you to collect trace
exits for a specific instruction. There is also some potential slowness
in the GC code that I'd like to take a second look at.

Co-authored-by: Aaron Patterson <tenderlove@r...>

Co-authored-by: Aaron Patterson <tenderlove@r...>
---
 yjit.c            |  4 ++++
 yjit.rb           | 45 +++++++++++++++++++++++-----------------
 yjit/src/stats.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 84 insertions(+), 27 deletions(-)

diff --git a/yjit.c b/yjit.c
index 108a376282..daf608b106 100644
--- a/yjit.c
+++ b/yjit.c
@@ -107,6 +107,10 @@ rb_yjit_add_frame(VALUE hash, VALUE frame) https://github.com/ruby/ruby/blob/trunk/yjit.c#L107
 
         rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name);
         rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file);
+        rb_hash_aset(frame_info, ID2SYM(rb_intern("samples")), INT2NUM(0));
+        rb_hash_aset(frame_info, ID2SYM(rb_intern("total_samples")), INT2NUM(0));
+        rb_hash_aset(frame_info, ID2SYM(rb_intern("edges")), rb_hash_new());
+        rb_hash_aset(frame_info, ID2SYM(rb_intern("lines")), rb_hash_new());
 
         if (line != INT2FIX(0)) {
             rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line);
diff --git a/yjit.rb b/yjit.rb
index 15c84d0c61..226f2a8134 100644
--- a/yjit.rb
+++ b/yjit.rb
@@ -41,16 +41,11 @@ module RubyVM::YJIT https://github.com/ruby/ruby/blob/trunk/yjit.rb#L41
     frames = results[:frames].dup
     samples_count = 0
 
-    frames.each do |frame_id, frame|
-      frame[:samples] = 0
-      frame[:edges] = {}
-    end
-
     # Loop through the instructions and set the frame hash with the data.
     # We use nonexistent.def for the file name, otherwise insns.def will be displayed
     # and that information isn't useful in this context.
     RubyVM::INSTRUCTION_NAMES.each_with_index do |name, frame_id|
-      frame_hash = { samples: 0, total_samples: 0, edges: {}, name: name, file: "nonexistent.def", line: nil }
+      frame_hash = { samples: 0, total_samples: 0, edges: {}, name: name, file: "nonexistent.def", line: nil, lines: {} }
       results[:frames][frame_id] = frame_hash
       frames[frame_id] = frame_hash
     end
@@ -58,12 +53,22 @@ module RubyVM::YJIT https://github.com/ruby/ruby/blob/trunk/yjit.rb#L53
     # Loop through the raw_samples and build the hashes for StackProf.
     # The loop is based off an example in the StackProf documentation and therefore
     # this functionality can only work with that library.
-    while raw_samples.length > 0
-      stack_trace = raw_samples.shift(raw_samples.shift + 1)
-      lines = line_samples.shift(line_samples.shift + 1)
+    #
+    # Raw Samples:
+    # [ length, frame1, frame2, frameN, ..., instruction, count
+    #
+    # Line Samples
+    # [ length, line_1, line_2, line_n, ..., dummy value, count
+    i = 0
+    while i < raw_samples.length
+      stack_length = raw_samples[i] + 1
+      i += 1 # consume the stack length
+
       prev_frame_id = nil
+      stack_length.times do |idx|
+        idx += i
+        frame_id = raw_samples[idx]
 
-      stack_trace.each_with_index do |frame_id, idx|
         if prev_frame_id
           prev_frame = frames[prev_frame_id]
           prev_frame[:edges][frame_id] ||= 0
@@ -71,26 +76,28 @@ module RubyVM::YJIT https://github.com/ruby/ruby/blob/trunk/yjit.rb#L76
         end
 
         frame_info = frames[frame_id]
-        frame_info[:total_samples] ||= 0
         frame_info[:total_samples] += 1
 
-        frame_info[:lines] ||= {}
-        frame_info[:lines][lines[idx]] ||= [0, 0]
-        frame_info[:lines][lines[idx]][0] += 1
+        frame_info[:lines][line_samples[idx]] ||= [0, 0]
+        frame_info[:lines][line_samples[idx]][0] += 1
 
         prev_frame_id = frame_id
       end
 
-      top_frame_id = stack_trace.last
+      i += stack_length # consume the stack
+
+      top_frame_id = prev_frame_id
       top_frame_line = 1
 
-      frames[top_frame_id][:samples] += 1
+      sample_count = raw_samples[i]
+
+      frames[top_frame_id][:samples] += sample_count
       frames[top_frame_id][:lines] ||= {}
       frames[top_frame_id][:lines][top_frame_line] ||= [0, 0]
-      frames[top_frame_id][:lines][top_frame_line][1] += 1
+      frames[top_frame_id][:lines][top_frame_line][1] += sample_count
 
-      samples_count += raw_samples.shift
-      line_samples.shift
+      samples_count += sample_count
+      i += 1
     end
 
     results[:samples] = samples_count
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 4118f894b4..4843cecf92 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -432,23 +432,67 @@ pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE) https://github.com/ruby/ruby/blob/trunk/yjit/src/stats.rs#L432
         // Call frame info is stored in the frames_buffer, line number information
         // in the lines_buffer. The first argument is the start point and the second
         // argument is the buffer limit, set at 2048.
-        let num = unsafe { rb_profile_frames(0, BUFF_LEN as i32, frames_buffer.as_mut_ptr(), lines_buffer.as_mut_ptr()) };
+        let stack_length = unsafe { rb_profile_frames(0, BUFF_LEN as i32, frames_buffer.as_mut_ptr(), lines_buffer.as_mut_ptr()) };
+        let samples_length = (stack_length as usize) + 3;
 
-        let mut i = num - 1;
         let yjit_raw_samples = YjitExitLocations::get_raw_samples();
         let yjit_line_samples = YjitExitLocations::get_line_samples();
 
-        yjit_raw_samples.push(VALUE(num as usize));
-        yjit_line_samples.push(num);
+        // If yjit_raw_samples is less than or equal to the current length of the samples
+        // we might have seen this stack trace previously.
+        if yjit_raw_samples.len() >= samples_length {
+            let prev_stack_len_index = yjit_raw_samples.len() - samples_length;
+            let prev_stack_len = i64::from(yjit_raw_samples[prev_stack_len_index]);
+            let mut idx = stack_length - 1;
+            let mut prev_frame_idx = 0;
+            let mut seen_already = true;
+
+            // If the previous stack lenght and current stack length are equal,
+            // loop and compare the current frame to the previous frame. If they are
+            // not equal, set seen_already to false and break out of the loop.
+            if prev_stack_len == stack_length as i64 {
+                while idx >= 0 {
+                    let current_frame = frames_buffer[idx as usize];
+                    let prev_frame = yjit_raw_samples[prev_stack_len_index + prev_frame_idx + 1];
+
+                    // If the current frame and previous frame are not equal, set
+                    // seen_already to false and break out of the loop.
+                    if current_frame != prev_frame {
+                        seen_already = false;
+                        break;
+                    }
+
+                    idx -= 1;
+                    prev_frame_idx += 1;
+                }
+
+                // If we know we've seen this stack before, increment the counter by 1.
+                if seen_already {
+                    let prev_idx = yjit_raw_samples.len() - 1;
+                    let prev_count = i64::from(yjit_raw_samples[prev_idx]);
+                    let new_count = prev_count + 1;
+
+                    yjit_raw_samples[prev_idx] = VALUE(new_count as usize);
+                    yjit_line_samples[prev_idx] = new_count as i32;
+
+                    return;
+                }
+            }
+        }
+
+        yjit_raw_samples.push(VALUE(stack_length as usize));
+        yjit_line_samples.push(stack_length);
+
+        let mut idx = stack_length - 1;
 
-        while i >= 0 {
-            let frame = frames_buffer[i as usize];
-            let line = lines_buffer[i as usize];
+        while idx >= 0 {
+            let frame = frames_buffer[idx as usize];
+            let line = lines_buffer[idx as usize];
 
             yjit_raw_samples.push(frame);
             yjit_line_samples.push(line);
 
-            i -= 1;
+            idx -= 1;
         }
 
         // Push the insn value into the yjit_raw_samples Vec.
@@ -459,6 +503,8 @@ pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE) https://github.com/ruby/ruby/blob/trunk/yjit/src/stats.rs#L503
         let line = yjit_line_samples.len() - 1;
         yjit_line_samples.push(line as i32);
 
+        // Push number of times seen onto the stack, which is 1
+        // because it's the first time we've seen it.
         yjit_raw_samples.push(VALUE(1 as usize));
         yjit_line_samples.push(1 (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]