[前][次][番号順一覧][スレッド一覧]

ruby-changes:66898

From: Samuel <ko1@a...>
Date: Tue, 27 Jul 2021 15:23:51 +0900 (JST)
Subject: [ruby-changes:66898] 13f8521c63 (master): Fix potential hang when joining threads.

https://git.ruby-lang.org/ruby.git/commit/?id=13f8521c63

From 13f8521c630a15c87398dee0763e95f59c032a94 Mon Sep 17 00:00:00 2001
From: Samuel Williams <samuel.williams@o...>
Date: Mon, 19 Jul 2021 19:21:46 +1200
Subject: Fix potential hang when joining threads.

If the thread termination invokes user code after `th->status` becomes
`THREAD_KILLED`, and the user unblock function causes that `th->status` to
become something else (e.g. `THREAD_RUNNING`), threads waiting in
`thread_join_sleep` will hang forever. We move the unblock function call
to before the thread status is updated, and allow threads to join as soon
as `th->value` becomes defined.
---
 test/fiber/scheduler.rb   | 16 ++++++++++++++--
 test/fiber/test_thread.rb | 14 ++++++++++++++
 thread.c                  | 21 +++++++++++++++++----
 vm.c                      | 17 ++++++++++-------
 4 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/test/fiber/scheduler.rb b/test/fiber/scheduler.rb
index af64e4e..2785561 100644
--- a/test/fiber/scheduler.rb
+++ b/test/fiber/scheduler.rb
@@ -112,8 +112,10 @@ class Scheduler https://github.com/ruby/ruby/blob/trunk/test/fiber/scheduler.rb#L112
 
     self.run
   ensure
-    @urgent.each(&:close)
-    @urgent = nil
+    if @urgent
+      @urgent.each(&:close)
+      @urgent = nil
+    end
 
     @closed = true
 
@@ -240,3 +242,13 @@ class BrokenUnblockScheduler < Scheduler https://github.com/ruby/ruby/blob/trunk/test/fiber/scheduler.rb#L242
     raise "Broken unblock!"
   end
 end
+
+class SleepingUnblockScheduler < Scheduler
+  # This method is invoked when the thread is exiting.
+  def unblock(blocker, fiber)
+    super
+
+    # This changes the current thread state to `THREAD_RUNNING` which causes `thread_join_sleep` to hang.
+    sleep(0.1)
+  end
+end
diff --git a/test/fiber/test_thread.rb b/test/fiber/test_thread.rb
index 843604b..acc8300 100644
--- a/test/fiber/test_thread.rb
+++ b/test/fiber/test_thread.rb
@@ -66,4 +66,18 @@ class TestFiberThread < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/fiber/test_thread.rb#L66
       thread.join
     end
   end
+
+  def test_thread_join_hang
+    thread = Thread.new do
+      scheduler = SleepingUnblockScheduler.new
+
+      Fiber.set_scheduler scheduler
+
+      Fiber.schedule do
+        Thread.new{sleep(0.01)}.value
+      end
+    end
+
+    thread.join
+  end
 end
diff --git a/thread.c b/thread.c
index 0f6838e..ec67bba 100644
--- a/thread.c
+++ b/thread.c
@@ -632,6 +632,7 @@ thread_cleanup_func_before_exec(void *th_ptr) https://github.com/ruby/ruby/blob/trunk/thread.c#L632
 {
     rb_thread_t *th = th_ptr;
     th->status = THREAD_KILLED;
+
     // The thread stack doesn't exist in the forked process:
     th->ec->machine.stack_start = th->ec->machine.stack_end = NULL;
 
@@ -817,6 +818,9 @@ thread_start_func_2(rb_thread_t *th, VALUE *stack_start) https://github.com/ruby/ruby/blob/trunk/thread.c#L818
 
     thread_debug("thread start (get lock): %p\n", (void *)th);
 
+    // Ensure that we are not joinable.
+    VM_ASSERT(th->value == Qundef);
+
     EC_PUSH_TAG(th->ec);
 
     if ((state = EC_EXEC_TAG()) == TAG_NONE) {
@@ -857,6 +861,12 @@ thread_start_func_2(rb_thread_t *th, VALUE *stack_start) https://github.com/ruby/ruby/blob/trunk/thread.c#L861
         th->value = Qnil;
     }
 
+    // The thread is effectively finished and can be joined.
+    VM_ASSERT(th->value != Qundef);
+
+    rb_threadptr_join_list_wakeup(th);
+    rb_threadptr_unlock_all_locking_mutexes(th);
+
     if (th->invoke_type == thread_invoke_type_ractor_proc) {
         rb_thread_terminate_all(th);
         rb_ractor_teardown(th->ec);
@@ -874,9 +884,6 @@ thread_start_func_2(rb_thread_t *th, VALUE *stack_start) https://github.com/ruby/ruby/blob/trunk/thread.c#L884
         rb_threadptr_raise(ractor_main_th, 1, &errinfo);
     }
 
-    rb_threadptr_join_list_wakeup(th);
-    rb_threadptr_unlock_all_locking_mutexes(th);
-
     EC_POP_TAG();
 
     rb_ec_clear_current_thread_trace_func(th->ec);
@@ -1153,6 +1160,12 @@ remove_from_join_list(VALUE arg) https://github.com/ruby/ruby/blob/trunk/thread.c#L1160
 
 static rb_hrtime_t *double2hrtime(rb_hrtime_t *, double);
 
+static int
+thread_finished(rb_thread_t *th)
+{
+    return th->status == THREAD_KILLED || th->value != Qundef;
+}
+
 static VALUE
 thread_join_sleep(VALUE arg)
 {
@@ -1179,7 +1192,7 @@ thread_join_sleep(VALUE arg) https://github.com/ruby/ruby/blob/trunk/thread.c#L1192
         end = rb_hrtime_add(*limit, rb_hrtime_now());
     }
 
-    while (target_th->status != THREAD_KILLED) {
+    while (!thread_finished(target_th)) {
         VALUE scheduler = rb_fiber_scheduler_current();
 
         if (scheduler != Qnil) {
diff --git a/vm.c b/vm.c
index 78aadb6..75cd371 100644
--- a/vm.c
+++ b/vm.c
@@ -3081,6 +3081,8 @@ th_init(rb_thread_t *th, VALUE self) https://github.com/ruby/ruby/blob/trunk/vm.c#L3081
     th->thread_id_string[0] = '\0';
 #endif
 
+    th->value = Qundef;
+
 #if OPT_CALL_THREADED_CODE
     th->retval = Qundef;
 #endif
@@ -3093,16 +3095,17 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/vm.c#L3095
 ruby_thread_init(VALUE self)
 {
     rb_thread_t *th = GET_THREAD();
-    rb_thread_t *targe_th = rb_thread_ptr(self);
+    rb_thread_t *target_th = rb_thread_ptr(self);
     rb_vm_t *vm = th->vm;
 
-    targe_th->vm = vm;
-    th_init(targe_th, self);
+    target_th->vm = vm;
+    th_init(target_th, self);
+
+    target_th->top_wrapper = 0;
+    target_th->top_self = rb_vm_top_self();
+    target_th->ec->root_svar = Qfalse;
+    target_th->ractor = th->ractor;
 
-    targe_th->top_wrapper = 0;
-    targe_th->top_self = rb_vm_top_self();
-    targe_th->ec->root_svar = Qfalse;
-    targe_th->ractor = th->ractor;
     return self;
 }
 
-- 
cgit v1.1


--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]