[前][次][番号順一覧][スレッド一覧]

ruby-changes:60766

From: Takashi <ko1@a...>
Date: Tue, 14 Apr 2020 08:59:40 +0900 (JST)
Subject: [ruby-changes:60766] b9d3ceee8f (master): Unwrap vm_call_cfunc indirection on JIT

https://git.ruby-lang.org/ruby.git/commit/?id=b9d3ceee8f

From b9d3ceee8f8e4fade1eddc0c18abee59f9c1eee7 Mon Sep 17 00:00:00 2001
From: Takashi Kokubun <takashikkbn@g...>
Date: Mon, 13 Apr 2020 12:17:45 -0700
Subject: Unwrap vm_call_cfunc indirection on JIT

for VM_METHOD_TYPE_CFUNC.

This has been known to decrease optcarrot fps:

```
$ benchmark-driver -v --rbenv 'before --jit;after --jit' benchmark.yml --repeat-count=24 --output=all
before --jit: ruby 2.8.0dev (2020-04-13T16:25:13Z master fb40495cd9) +JIT [x86_64-linux]
after --jit: ruby 2.8.0dev (2020-04-13T23:23:11Z mjit-inline-c bdcd06d159) +JIT [x86_64-linux]
Calculating -------------------------------------
                                 before --jit           after --jit
Optcarrot Lan_Master.nes    66.38132676191719     67.41369177299630 fps
                            69.42728743772243     68.90327567263054
                            72.16028300263211     69.62605130880686
                            72.46631319102777     70.48818243767207
                            73.37078877002490     70.79522887347566
                            73.69422431217367     70.99021920193194
                            74.01471487018695     74.69931965402584
                            75.48685183295630     74.86714575949016
                            75.54445264507932     75.97864419721677
                            77.28089738169756     76.48908637569581
                            78.04183397891302     76.54320932488021
                            78.36807984096562     76.59407262898067
                            78.92898762543574     77.31316743361343
                            78.93576483233765     77.97153484180480
                            79.13754917503078     77.98478782102325
                            79.62648945850653     78.02263322726446
                            79.86334213878064     78.26333724045934
                            80.05100635898518     78.60056756355614
                            80.26186843769584     78.91082645644468
                            80.34205717020330     79.01226659142263
                            80.62286066044338     79.32733939423721
                            80.95883033058557     79.63793060542024
                            80.97376819251613     79.73108936622778
                            81.23050939202896     80.18280109433088
```

and I deleted this capability in an early stage of YARV-MJIT development:
https://github.com/k0kubun/yarv-mjit/commit/0ab130feeefc2b9078a1077e4fec93b3f5e45d07

I suspect either of the following things could be the cause:

* Directly calling vm_call_cfunc requires more optimization effort in GCC,
  resulting in 30ms-ish compilation time increase for such methods and
  decreasing the number of methods compiled in a benchmarked period.

* Code size increase => icache miss hit

These hypotheses could be verified by some methodologies. However, I'd
like to introduce this regardless of the result because this blocks
inlining C method's definition.

I may revert this commit when I give up to implement inlining C method
definition, which requires this change.

Microbenchmark-wise, this gives slight performance improvement:

```
$ benchmark-driver -v --rbenv 'before --jit;after --jit' benchmark/mjit_send_cfunc.yml --repeat-count=4
before --jit: ruby 2.8.0dev (2020-04-13T16:25:13Z master fb40495cd9) +JIT [x86_64-linux]
after --jit: ruby 2.8.0dev (2020-04-13T23:23:11Z mjit-inline-c bdcd06d159) +JIT [x86_64-linux]
Calculating -------------------------------------
                     before --jit  after --jit
     mjit_send_cfunc      41.961M      56.489M i/s -    100.000M times in 2.383143s 1.770244s

Comparison:
                  mjit_send_cfunc
         after --jit:  56489372.5 i/s
        before --jit:  41961388.1 i/s - 1.35x  slower
```

diff --git a/benchmark/mjit_send_cfunc.yml b/benchmark/mjit_send_cfunc.yml
new file mode 100644
index 0000000..b5f9c89
--- /dev/null
+++ b/benchmark/mjit_send_cfunc.yml
@@ -0,0 +1,6 @@ https://github.com/ruby/ruby/blob/trunk/benchmark/mjit_send_cfunc.yml#L1
+prelude: |
+  def mjit_send_cfunc
+    self.class
+  end
+benchmark: mjit_send_cfunc
+loop_count: 100000000
diff --git a/tool/ruby_vm/views/_mjit_compile_send.erb b/tool/ruby_vm/views/_mjit_compile_send.erb
index 0e033a4..255648b 100644
--- a/tool/ruby_vm/views/_mjit_compile_send.erb
+++ b/tool/ruby_vm/views/_mjit_compile_send.erb
@@ -15,15 +15,18 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_mjit_compile_send.erb#L15
 % # compiler: Use captured cc to avoid race condition
     const struct rb_callcache *captured_cc = captured_cc_entries(status)[call_data_index(cd, body)];
 %
-    const rb_iseq_t *iseq;
+% # compiler: Inline send insn where some supported fastpath is used.
+    const rb_iseq_t *iseq = NULL;
     const CALL_INFO ci = cd->ci;
-    if (!status->compile_info->disable_send_cache && has_valid_method_type(captured_cc)
-        // CC_SET_FASTPATH in vm_callee_setup_arg
-        && !(vm_ci_flag(ci) & VM_CALL_TAILCALL) // inlining non-tailcall path
-        && vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_ISEQ
-        && fastpath_applied_iseq_p(ci, captured_cc, iseq = def_iseq_ptr(vm_cc_cme(captured_cc)->def))) {
-
-        int param_size = iseq->body->param.size;
+    if (!status->compile_info->disable_send_cache && has_valid_method_type(captured_cc) && (
+%       # `CC_SET_FASTPATH(cc, vm_call_cfunc, TRUE)` in `vm_call_method_each_type`
+        vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_CFUNC
+%       # `CC_SET_FASTPATH(cc, vm_call_iseq_setup_func(...), vm_call_iseq_optimizable_p(...))` in `vm_callee_setup_arg`,
+%       # and support only non-VM_CALL_TAILCALL path inside it
+        || (vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_ISEQ
+            && fastpath_applied_iseq_p(ci, captured_cc, iseq = def_iseq_ptr(vm_cc_cme(captured_cc)->def))
+            && !(vm_ci_flag(ci) & VM_CALL_TAILCALL))
+    )) {
         int sp_inc = (int)sp_inc_of_sendish(ci);
         fprintf(f, "{\n");
 
@@ -40,7 +43,7 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_mjit_compile_send.erb#L43
 <%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%>
 
 % # JIT: If ISeq is inlinable, call the inlined method without pushing a frame.
-        if (status->inlined_iseqs != NULL && status->inlined_iseqs[pos] == iseq->body) {
+        if (iseq && status->inlined_iseqs != NULL && iseq->body == status->inlined_iseqs[pos]) {
             fprintf(f, "    {\n");
             fprintf(f, "        VALUE orig_self = reg_cfp->self;\n");
             fprintf(f, "        reg_cfp->self = stack[%d];\n", b->stack_size + sp_inc - 1);
@@ -49,7 +52,7 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_mjit_compile_send.erb#L52
             fprintf(f, "    }\n");
         }
         else {
-% # JIT: Forked `vm_sendish` to inline various things
+% # JIT: Forked `vm_sendish` (except method_explorer = vm_search_method_wrap) to inline various things
             fprintf(f, "    {\n");
             fprintf(f, "        VALUE val;\n");
             fprintf(f, "        struct rb_calling_info calling;\n");
@@ -58,20 +61,28 @@ https://github.com/ruby/ruby/blob/trunk/tool/ruby_vm/views/_mjit_compile_send.erb#L61
 % else
             fprintf(f, "        calling.block_handler = VM_BLOCK_HANDLER_NONE;\n");
 % end
-            fprintf(f, "        calling.argc = %d;\n", vm_ci_argc(ci));
+            fprintf(f, "        calling.kw_splat = %d;\n", IS_ARGS_KW_SPLAT(ci) > 0);
             fprintf(f, "        calling.recv = stack[%d];\n", b->stack_size + sp_inc - 1);
+            fprintf(f, "        calling.argc = %d;\n", vm_ci_argc(ci));
 
-%           # fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE
-            fprintf(f, "        vm_call_iseq_setup_normal(ec, reg_cfp, &calling, cc_cme, 0, %d, %d);\n", param_size, iseq->body->local_table_size);
-            if (iseq->body->catch_except_p) {
-                fprintf(f, "        VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n");
-                fprintf(f, "        val = vm_exec(ec, TRUE);\n");
+            if (vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_CFUNC) {
+%               # TODO: optimize this more
+                fprintf(f, "        CALL_DATA cd = (CALL_DATA)0x%"PRIxVALUE";\n", operands[0]);
+                fprintf(f, "        val = vm_call_cfunc(ec, reg_cfp, &calling, cd);\n");
             }
-            else {
-                fprintf(f, "        if ((val = mjit_exec(ec)) == Qundef) {\n");
-                fprintf(f, "            VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); // This is vm_call0_body's code after vm_call_iseq_setup
-                fprintf(f, "            val = vm_exec(ec, FALSE);\n");
-                fprintf(f, "        }\n");
+            else { // VM_METHOD_TYPE_ISEQ
+%               # fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE
+                fprintf(f, "        vm_call_iseq_setup_normal(ec, reg_cfp, &calling, cc_cme, 0, %d, %d);\n", iseq->body->param.size, iseq->body->local_table_size);
+                if (iseq->body->catch_except_p) {
+                    fprintf(f, "        VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n");
+                    fprintf(f, "        val = vm_exec(ec, TRUE);\n");
+                }
+                else {
+                    fprintf(f, "        if ((val = mjit_exec(ec)) == Qundef) {\n");
+                    fprintf(f, "            VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); // This is vm_call0_body's code after vm_call_iseq_setup
+                    fprintf(f, "            val = vm_exec(ec, FALSE);\n");
+                    fprintf(f, "        }\n");
+                }
             }
             fprintf(f, "        stack[%d] = val;\n", b->stack_size + sp_inc - 1);
             fprintf(f, "    }\n");
-- 
cgit v0.10.2


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]