ruby-changes:68032
From: Jeremy <ko1@a...>
Date: Sun, 19 Sep 2021 01:05:39 +0900 (JST)
Subject: [ruby-changes:68032] ddb85c5d2b (master): Do not load file with same realpath twice when requiring
https://git.ruby-lang.org/ruby.git/commit/?id=ddb85c5d2b From ddb85c5d2bdf75a83eb163856508691a7436b446 Mon Sep 17 00:00:00 2001 From: Jeremy Evans <code@j...> Date: Wed, 30 Jun 2021 13:50:19 -0700 Subject: Do not load file with same realpath twice when requiring This fixes issues with paths being loaded twice in certain cases when symlinks are used. It took me multiple attempts to get this working. My original attempt tried to convert paths to realpaths before adding them to $LOADED_FEATURES. Unfortunately, this doesn't work well with the loaded feature index, which is based off load paths and not realpaths. While I was able to get require working, I'm fairly sure the loaded feature index was not being used as expected, which would have significant performance implications. Additionally, I was never able to get that approach working with autoload when autoloading a non-realpath file. It also broke some specs. This takes a more conservative approach. Directly before loading the file, if the file with the same realpath has been required, the loading of the file is skipped. The realpaths are stored as fstrings in a hidden hash. When rebuilding the loaded feature index, the hash of realpaths is also rebuilt. I'm guessing this makes rebuilding process slower, but I don think that is a hot path. In general, modifying loaded features is only done when reloading, and that tends to be in non-production environments. Change test_require_with_loaded_features_pop test to use 30 threads and 300 iterations, instead of 4 threads and 1000 iterations. I saw only sporadic failures with 4/1000, but consistent failures 30/300 threads. These failures were due to the fact that the concurrent deletions from $LOADED_FEATURES in other threads can result in rb_ary_entry returning nil when rebuilding the loaded features index. To avoid concurrency issues when rebuilding the loaded features index, the building of the index itself is left alone, and afterwards, a separate loop is done on a copy of the loaded feature snapshot in order to rebuild the realpaths hash. Fixes [Bug #17885] --- load.c | 33 ++++++++++++++++++++++++++++++++- test/ruby/test_require.rb | 30 ++++++++++++++++++++++++++++-- vm.c | 2 ++ vm_core.h | 1 + 4 files changed, 63 insertions(+), 3 deletions(-) diff --git a/load.c b/load.c index 28c17e8..4fb1301 100644 --- a/load.c +++ b/load.c @@ -153,6 +153,12 @@ get_loaded_features(void) https://github.com/ruby/ruby/blob/trunk/load.c#L153 } static VALUE +get_loaded_features_realpaths(void) +{ + return GET_VM()->loaded_features_realpaths; +} + +static VALUE get_LOADED_FEATURES(ID _x, VALUE *_y) { return get_loaded_features(); @@ -317,6 +323,9 @@ get_loaded_features_index(void) https://github.com/ruby/ruby/blob/trunk/load.c#L323 /* The sharing was broken; something (other than us in rb_provide_feature()) modified loaded_features. Rebuild the index. */ st_foreach(vm->loaded_features_index, loaded_features_index_clear_i, 0); + + VALUE realpaths = vm->loaded_features_realpaths; + rb_hash_clear(realpaths); features = vm->loaded_features; for (i = 0; i < RARRAY_LEN(features); i++) { VALUE entry, as_str; @@ -328,6 +337,15 @@ get_loaded_features_index(void) https://github.com/ruby/ruby/blob/trunk/load.c#L337 features_index_add(as_str, INT2FIX(i)); } reset_loaded_features_snapshot(); + + features = rb_ary_dup(vm->loaded_features_snapshot); + long j = RARRAY_LEN(features); + for (i = 0; i < j; i++) { + VALUE as_str = rb_ary_entry(features, i); + VALUE realpath = rb_check_realpath(Qnil, as_str, NULL); + if (NIL_P(realpath)) realpath = as_str; + rb_hash_aset(realpaths, rb_fstring(realpath), Qtrue); + } } return vm->loaded_features_index; } @@ -1063,6 +1081,8 @@ require_internal(rb_execution_context_t *ec, VALUE fname, int exception) https://github.com/ruby/ruby/blob/trunk/load.c#L1081 char *volatile ftptr = 0; VALUE path; volatile VALUE saved_path; + VALUE realpath = 0; + VALUE realpaths = get_loaded_features_realpaths(); volatile bool reset_ext_config = false; struct rb_ext_config prev_ext_config; @@ -1090,6 +1110,10 @@ require_internal(rb_execution_context_t *ec, VALUE fname, int exception) https://github.com/ruby/ruby/blob/trunk/load.c#L1110 else if (!*ftptr) { result = TAG_RETURN; } + else if (RTEST(rb_hash_aref(realpaths, + realpath = rb_realpath_internal(Qnil, path, 1)))) { + result = 0; + } else { switch (found) { case 'r': @@ -1141,7 +1165,12 @@ require_internal(rb_execution_context_t *ec, VALUE fname, int exception) https://github.com/ruby/ruby/blob/trunk/load.c#L1165 rb_exc_raise(ec->errinfo); } - if (result == TAG_RETURN) rb_provide_feature(path); + if (result == TAG_RETURN) { + rb_provide_feature(path); + if (realpath) { + rb_hash_aset(realpaths, rb_fstring(realpath), Qtrue); + } + } ec->errinfo = saved.errinfo; RUBY_DTRACE_HOOK(REQUIRE_RETURN, RSTRING_PTR(fname)); @@ -1341,6 +1370,8 @@ Init_load(void) https://github.com/ruby/ruby/blob/trunk/load.c#L1370 vm->loaded_features = rb_ary_new(); vm->loaded_features_snapshot = rb_ary_tmp_new(0); vm->loaded_features_index = st_init_numtable(); + vm->loaded_features_realpaths = rb_hash_new(); + rb_obj_hide(vm->loaded_features_realpaths); rb_define_global_function("load", rb_f_load, -1); rb_define_global_function("require", rb_f_require, 1); diff --git a/test/ruby/test_require.rb b/test/ruby/test_require.rb index 401b274..e996c6d 100644 --- a/test/ruby/test_require.rb +++ b/test/ruby/test_require.rb @@ -434,6 +434,32 @@ class TestRequire < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_require.rb#L434 } end + def test_relative_symlink_realpath + Dir.mktmpdir {|tmp| + Dir.chdir(tmp) { + Dir.mkdir "a" + File.open("a/a.rb", "w") {|f| f.puts 'require_relative "b"' } + File.open("a/b.rb", "w") {|f| f.puts '$t += 1' } + Dir.mkdir "b" + File.binwrite("c.rb", <<~RUBY) + $t = 0 + $:.unshift(File.expand_path('../b', __FILE__)) + require "b" + require "a" + print $t + RUBY + begin + File.symlink("../a/a.rb", "b/a.rb") + File.symlink("../a/b.rb", "b/b.rb") + result = IO.popen([EnvUtil.rubybin, "c.rb"], &:read) + assert_equal("1", result, "bug17885 [ruby-core:104010]") + rescue NotImplementedError, Errno::EACCES + skip "File.symlink is not implemented" + end + } + } + end + def test_frozen_loaded_features bug3756 = '[ruby-core:31913]' assert_in_out_err(['-e', '$LOADED_FEATURES.freeze; require "ostruct"'], "", @@ -711,8 +737,8 @@ class TestRequire < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/ruby/test_require.rb#L737 assert_in_out_err([{"RUBYOPT" => nil}, "-", script.path], "#{<<~"begin;"}\n#{<<~"end;"}", %w(:ok), [], bug7530, timeout: 60) begin; PATH = ARGV.shift - THREADS = 4 - ITERATIONS_PER_THREAD = 1000 + THREADS = 30 + ITERATIONS_PER_THREAD = 300 THREADS.times.map { Thread.new do diff --git a/vm.c b/vm.c index 1a21638..f6b0f94 100644 --- a/vm.c +++ b/vm.c @@ -2486,6 +2486,7 @@ rb_vm_update_references(void *ptr) https://github.com/ruby/ruby/blob/trunk/vm.c#L2486 vm->expanded_load_path = rb_gc_location(vm->expanded_load_path); vm->loaded_features = rb_gc_location(vm->loaded_features); vm->loaded_features_snapshot = rb_gc_location(vm->loaded_features_snapshot); + vm->loaded_features_realpaths = rb_gc_location(vm->loaded_features_realpaths); vm->top_self = rb_gc_location(vm->top_self); vm->orig_progname = rb_gc_location(vm->orig_progname); @@ -2573,6 +2574,7 @@ rb_vm_mark(void *ptr) https://github.com/ruby/ruby/blob/trunk/vm.c#L2574 rb_gc_mark_movable(vm->expanded_load_path); rb_gc_mark_movable(vm->loaded_features); rb_gc_mark_movable(vm->loaded_features_snapshot); + rb_gc_mark_movable(vm->loaded_features_realpaths); rb_gc_mark_movable(vm->top_self); rb_gc_mark_movable(vm->orig_progname); RUBY_MARK_MOVABLE_UNLESS_NULL(vm->coverages); diff --git a/vm_core.h b/vm_core.h index 58243b1..6b627f4 100644 --- a/vm_core.h +++ b/vm_core.h @@ -631,6 +631,7 @@ typedef struct rb_vm_struct { https://github.com/ruby/ruby/blob/trunk/vm_core.h#L631 VALUE expanded_load_path; VALUE loaded_features; VALUE loaded_features_snapshot; + VALUE loaded_features_realpaths; struct st_table *loaded_features_index; struct st_table *loading_table; -- cgit v1.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/