ruby-changes:55323
From: k0kubun <ko1@a...>
Date: Sun, 14 Apr 2019 13:52:07 +0900 (JST)
Subject: [ruby-changes:55323] k0kubun:r67530 (trunk): Recompile JIT-ed code without optimization
k0kubun 2019-04-14 13:52:02 +0900 (Sun, 14 Apr 2019) New Revision: 67530 https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=67530 Log: Recompile JIT-ed code without optimization based on inline cache when JIT cancel happens by that. This feature was in the original MJIT implementation by Vladimir, but on merging MJIT to Ruby it was removed for simplification. This commit adds the functionality again for the following benchmark: https://github.com/benchmark-driver/misc/blob/52f05781f65467baf895bf6ba79d172c9b0826fd/concurrent-map/bench.rb (shown float is duration seconds. shorter is better) * Before ``` $ INHERIT=0 ruby -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux] -- 1.6507579649914987 $ INHERIT=0 ruby -v --jit bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux] -- 1.5091587850474752 $ INHERIT=1 ruby -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux] -- 1.6124781150138006 $ INHERIT=1 ruby --jit -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux] -- 1.7495657080435194 # <-- this ``` * After ``` $ INHERIT=0 ruby -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux] last_commit=Recompile JIT-ed code without optimization -- 1.653559010999743 $ INHERIT=0 ruby --jit -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux] last_commit=Recompile JIT-ed code without optimization -- 1.4738391840364784 $ INHERIT=1 ruby -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) [x86_64-linux] last_commit=Recompile JIT-ed code without optimization -- 1.645227018976584 $ INHERIT=1 ruby --jit -v bench.rb ruby 2.7.0dev (2019-04-13 trunk 67523) +JIT [x86_64-linux] last_commit=Recompile JIT-ed code without optimization -- 1.523708809982054 # <-- this ``` Modified files: trunk/mjit.c trunk/mjit.h trunk/mjit_compile.c trunk/mjit_worker.c trunk/test/ruby/test_jit.rb trunk/tool/ruby_vm/views/_mjit_compile_ivar.erb trunk/tool/ruby_vm/views/_mjit_compile_send.erb Index: mjit_worker.c =================================================================== --- mjit_worker.c (revision 67529) +++ mjit_worker.c (revision 67530) @@ -144,6 +144,8 @@ struct rb_mjit_unit { https://github.com/ruby/ruby/blob/trunk/mjit_worker.c#L144 /* Only used by unload_units. Flag to check this unit is currently on stack or not. */ char used_code_p; struct list_node unode; + // mjit_compile's optimization switches + struct rb_mjit_compile_info compile_info; }; /* Linked list of struct rb_mjit_unit. */ @@ -184,6 +186,8 @@ static struct rb_mjit_unit_list unit_que https://github.com/ruby/ruby/blob/trunk/mjit_worker.c#L186 static struct rb_mjit_unit_list active_units = { LIST_HEAD_INIT(active_units.head) }; /* List of compacted so files which will be cleaned up by `free_list()` in `mjit_finish()`. */ static struct rb_mjit_unit_list compact_units = { LIST_HEAD_INIT(compact_units.head) }; +// List of units before recompilation and just waiting for dlclose(). +static struct rb_mjit_unit_list stale_units = { LIST_HEAD_INIT(stale_units.head) }; /* The number of so far processed ISEQs, used to generate unique id. */ static int current_unit_num; /* A mutex for conitionals and critical sections. */ Index: mjit.c =================================================================== --- mjit.c (revision 67529) +++ mjit.c (revision 67530) @@ -299,16 +299,16 @@ unload_units(void) https://github.com/ruby/ruby/blob/trunk/mjit.c#L299 verbose(1, "Too many JIT code -- %d units unloaded", units_num - active_units.length); } -/* Add ISEQ to be JITed in parallel with the current thread. - Unload some JIT codes if there are too many of them. */ -void -mjit_add_iseq_to_process(const rb_iseq_t *iseq) +static void +mjit_add_iseq_to_process(const rb_iseq_t *iseq, const struct rb_mjit_compile_info *compile_info) { if (!mjit_enabled || pch_status == PCH_FAILED) return; iseq->body->jit_func = (mjit_func_t)NOT_READY_JIT_ISEQ_FUNC; create_unit(iseq); + if (compile_info != NULL) + iseq->body->jit_unit->compile_info = *compile_info; if (iseq->body->jit_unit == NULL) /* Failure in creating the unit. */ return; @@ -323,13 +323,19 @@ mjit_add_iseq_to_process(const rb_iseq_t https://github.com/ruby/ruby/blob/trunk/mjit.c#L323 CRITICAL_SECTION_FINISH(3, "in add_iseq_to_process"); } +/* Add ISEQ to be JITed in parallel with the current thread. + Unload some JIT codes if there are too many of them. */ +void +rb_mjit_add_iseq_to_process(const rb_iseq_t *iseq) +{ + mjit_add_iseq_to_process(iseq, NULL); +} + /* For this timeout seconds, --jit-wait will wait for JIT compilation finish. */ #define MJIT_WAIT_TIMEOUT_SECONDS 60 -/* Wait for JIT compilation finish for --jit-wait, and call the function pointer - if the compiled result is not NOT_COMPILED_JIT_ISEQ_FUNC. */ -VALUE -mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body) +static void +mjit_wait(struct rb_iseq_constant_body *body) { struct timeval tv; int tries = 0; @@ -350,13 +356,48 @@ mjit_wait_call(rb_execution_context_t *e https://github.com/ruby/ruby/blob/trunk/mjit.c#L356 CRITICAL_SECTION_FINISH(3, "in mjit_wait_call for a client wakeup"); rb_thread_wait_for(tv); } +} +/* Wait for JIT compilation finish for --jit-wait, and call the function pointer + if the compiled result is not NOT_COMPILED_JIT_ISEQ_FUNC. */ +VALUE +mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body) +{ + mjit_wait(body); if ((uintptr_t)body->jit_func <= (uintptr_t)LAST_JIT_ISEQ_FUNC) { return Qundef; } return body->jit_func(ec, ec->cfp); } +struct rb_mjit_compile_info* +rb_mjit_iseq_compile_info(const struct rb_iseq_constant_body *body) +{ + assert(body->jit_unit != NULL); + return &body->jit_unit->compile_info; +} + +void +rb_mjit_recompile_iseq(const rb_iseq_t *iseq) +{ + if ((ptrdiff_t)iseq->body->jit_func <= (ptrdiff_t)LAST_JIT_ISEQ_FUNC) + return; + + verbose(1, "JIT recompile: %s@%s:%d", RSTRING_PTR(iseq->body->location.label), + RSTRING_PTR(rb_iseq_path(iseq)), FIX2INT(iseq->body->location.first_lineno)); + + CRITICAL_SECTION_START(3, "in rb_mjit_recompile_iseq"); + remove_from_list(iseq->body->jit_unit, &active_units); + iseq->body->jit_func = (void *)NOT_ADDED_JIT_ISEQ_FUNC; + add_to_list(iseq->body->jit_unit, &stale_units); + CRITICAL_SECTION_FINISH(3, "in rb_mjit_recompile_iseq"); + + mjit_add_iseq_to_process(iseq, &iseq->body->jit_unit->compile_info); + if (UNLIKELY(mjit_opts.wait)) { + mjit_wait(iseq->body); + } +} + extern VALUE ruby_archlibdir_path, ruby_prefix_path; // Initialize header_file, pch_file, libruby_pathflag. Return true on success. @@ -818,6 +859,7 @@ mjit_finish(bool close_handle_p) https://github.com/ruby/ruby/blob/trunk/mjit.c#L859 free_list(&unit_queue, close_handle_p); free_list(&active_units, close_handle_p); free_list(&compact_units, close_handle_p); + free_list(&stale_units, close_handle_p); finish_conts(); mjit_enabled = false; Index: mjit.h =================================================================== --- mjit.h (revision 67529) +++ mjit.h (revision 67530) @@ -55,14 +55,24 @@ struct mjit_options { https://github.com/ruby/ruby/blob/trunk/mjit.h#L55 int max_cache_size; }; +// State of optimization switches +struct rb_mjit_compile_info { + // Disable getinstancevariable/setinstancevariable optimizations based on inline cache + bool disable_ivar_cache; + // Disable send/opt_send_without_block optimizations based on inline cache + bool disable_send_cache; +}; + typedef VALUE (*mjit_func_t)(rb_execution_context_t *, rb_control_frame_t *); RUBY_SYMBOL_EXPORT_BEGIN RUBY_EXTERN struct mjit_options mjit_opts; RUBY_EXTERN bool mjit_call_p; -extern void mjit_add_iseq_to_process(const rb_iseq_t *iseq); +extern void rb_mjit_add_iseq_to_process(const rb_iseq_t *iseq); extern VALUE mjit_wait_call(rb_execution_context_t *ec, struct rb_iseq_constant_body *body); +extern struct rb_mjit_compile_info* rb_mjit_iseq_compile_info(const struct rb_iseq_constant_body *body); +extern void rb_mjit_recompile_iseq(const rb_iseq_t *iseq); RUBY_SYMBOL_EXPORT_END extern bool mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname); @@ -120,7 +130,7 @@ mjit_exec(rb_execution_context_t *ec) https://github.com/ruby/ruby/blob/trunk/mjit.h#L130 RB_DEBUG_COUNTER_INC(mjit_exec_not_added); if (total_calls == mjit_opts.min_calls && mjit_target_iseq_p(body)) { RB_DEBUG_COUNTER_INC(mjit_exec_not_added_add_iseq); - mjit_add_iseq_to_process(iseq); + rb_mjit_add_iseq_to_process(iseq); if (UNLIKELY(mjit_opts.wait)) { return mjit_wait_call(ec, body); } Index: mjit_compile.c =================================================================== --- mjit_compile.c (revision 67529) +++ mjit_compile.c (revision 67530) @@ -37,6 +37,8 @@ struct compile_status { https://github.com/ruby/ruby/blob/trunk/mjit_compile.c#L37 // Safely-accessible cache entries copied from main thread. union iseq_inline_storage_entry *is_entries; struct rb_call_cache *cc_entries; + // Mutated optimization levels + struct rb_mjit_compile_info *compile_info; }; /* Storage to keep data which is consistent in each conditional branch. @@ -213,6 +215,7 @@ mjit_compile(FILE *f, const rb_iseq_t *i https://github.com/ruby/ruby/blob/trunk/mjit_compile.c#L215 alloca(sizeof(struct rb_call_cache) * (body->ci_size + body->ci_kw_size)) : NULL, .is_entries = (body->is_size > 0) ? alloca(sizeof(union iseq_inline_storage_entry) * body->is_size) : NULL, + .compile_info = rb_mjit_iseq_compile_info(body), }; memset(status.stack_size_for_pos, NOT_COMPILED_STACK_SIZE, sizeof(int) * body->iseq_size); if ((status.cc_entries != NULL || status.is_entries != NULL) @@ -235,6 +238,7 @@ mjit_compile(FILE *f, const rb_iseq_t *i https://github.com/ruby/ruby/blob/trunk/mjit_compile.c#L238 else { fprintf(f, " VALUE *stack = reg_cfp->sp;\n"); } + fprintf(f, " static const rb_iseq_t *original_iseq = 0x%"PRIxVALUE";\n", (VALUE)iseq); fprintf(f, " static const VALUE *const original_body_iseq = (VALUE *)0x%"PRIxVALUE";\n", (VALUE)body->iseq_encoded); Index: tool/ruby_vm/views/_mjit_compile_ivar.erb =================================================================== --- tool/ruby_vm/views/_mjit_compile_ivar.erb (revision 67529) +++ tool/ruby_vm/views/_mjit_compile_ivar.erb (revision 67530) @@ -17,7 +17,7 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_mjit_compile_ivar.erb#L17 IC ic_copy = &(status->is_entries + ((union iseq_inline_storage_entry *)ic - body->is_entries))->cache; % % # compiler: Consider cfp->self as T_OBJECT if ic_copy->ic_serial is set - if (ic_copy->ic_serial) { + if (!status->compile_info->disable_ivar_cache && ic_copy->ic_serial) { % # JIT: optimize away motion of sp and pc. This path does not call rb_warning() and so it's always leaf and not `handles_sp`. % # <%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%> % @@ -43,6 +43,8 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_mjit_compile_ivar.erb#L43 fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos); fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size); fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_ivar);\n"); + fprintf(f, " rb_mjit_iseq_compile_info(original_iseq->body)->disable_ivar_cache = true;\n"); + fprintf(f, " rb_mjit_recompile_iseq(original_iseq);\n"); fprintf(f, " goto cancel;\n"); fprintf(f, " }\n"); Index: tool/ruby_vm/views/_mjit_compile_send.erb =================================================================== --- tool/ruby_vm/views/_mjit_compile_send.erb (revision 67529) +++ tool/ruby_vm/views/_mjit_compile_send.erb (revision 67530) @@ -16,7 +16,7 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_mjit_compile_send.erb#L16 % # compiler: Use copied cc to avoid race condition CALL_CACHE cc_copy = status->cc_entries + (cc - body->cc_entries); % - if (has_valid_method_type(cc_copy)) { + if (!status->compile_info->disable_send_cache && has_valid_method_type(cc_copy)) { const rb_iseq_t *iseq; unsigned int argc = ci->orig_argc; // this `argc` variable is for calculating a value's position on stack considering `blockarg`. % if insn.name == 'send' @@ -39,6 +39,8 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_mjit_compile_send.erb#L39 fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos); fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size); fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_send_inline);\n"); + fprintf(f, " rb_mjit_iseq_compile_info(original_iseq->body)->disable_send_cache = true;\n"); + fprintf(f, " rb_mjit_recompile_iseq(original_iseq);\n"); fprintf(f, " goto cancel;\n"); fprintf(f, " }\n"); Index: test/ruby/test_jit.rb =================================================================== --- test/ruby/test_jit.rb (revision 67529) +++ test/ruby/test_jit.rb (revision 67530) @@ -10,6 +10,7 @@ class TestJIT < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_jit.rb#L10 include JITSupport IGNORABLE_PATTERNS = [ + /\AJIT recompile: .+\n\z/, /\ASuccessful MJIT finish\n\z/, ] @@ -529,7 +530,7 @@ class TestJIT < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_jit.rb#L530 end; # send call -> optimized call (send JIT) -> optimized call - assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: '122', success_count: 1, min_calls: 2) + assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: '122', success_count: 2, min_calls: 2) begin; obj = Object.new def obj.[](h) @@ -704,7 +705,7 @@ class TestJIT < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_jit.rb#L705 end def test_inlined_undefined_ivar - assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: "bbb", success_count: 2, min_calls: 3) + assert_eval_with_jit("#{<<~"begin;"}\n#{<<~"end;"}", stdout: "bbb", success_count: 3, min_calls: 3) begin; class Foo def initialize -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/