ruby-changes:66898
From: Samuel <ko1@a...>
Date: Tue, 27 Jul 2021 15:23:51 +0900 (JST)
Subject: [ruby-changes:66898] 13f8521c63 (master): Fix potential hang when joining threads.
https://git.ruby-lang.org/ruby.git/commit/?id=13f8521c63 From 13f8521c630a15c87398dee0763e95f59c032a94 Mon Sep 17 00:00:00 2001 From: Samuel Williams <samuel.williams@o...> Date: Mon, 19 Jul 2021 19:21:46 +1200 Subject: Fix potential hang when joining threads. If the thread termination invokes user code after `th->status` becomes `THREAD_KILLED`, and the user unblock function causes that `th->status` to become something else (e.g. `THREAD_RUNNING`), threads waiting in `thread_join_sleep` will hang forever. We move the unblock function call to before the thread status is updated, and allow threads to join as soon as `th->value` becomes defined. --- test/fiber/scheduler.rb | 16 ++++++++++++++-- test/fiber/test_thread.rb | 14 ++++++++++++++ thread.c | 21 +++++++++++++++++---- vm.c | 17 ++++++++++------- 4 files changed, 55 insertions(+), 13 deletions(-) diff --git a/test/fiber/scheduler.rb b/test/fiber/scheduler.rb index af64e4e..2785561 100644 --- a/test/fiber/scheduler.rb +++ b/test/fiber/scheduler.rb @@ -112,8 +112,10 @@ class Scheduler https://github.com/ruby/ruby/blob/trunk/test/fiber/scheduler.rb#L112 self.run ensure - @urgent.each(&:close) - @urgent = nil + if @urgent + @urgent.each(&:close) + @urgent = nil + end @closed = true @@ -240,3 +242,13 @@ class BrokenUnblockScheduler < Scheduler https://github.com/ruby/ruby/blob/trunk/test/fiber/scheduler.rb#L242 raise "Broken unblock!" end end + +class SleepingUnblockScheduler < Scheduler + # This method is invoked when the thread is exiting. + def unblock(blocker, fiber) + super + + # This changes the current thread state to `THREAD_RUNNING` which causes `thread_join_sleep` to hang. + sleep(0.1) + end +end diff --git a/test/fiber/test_thread.rb b/test/fiber/test_thread.rb index 843604b..acc8300 100644 --- a/test/fiber/test_thread.rb +++ b/test/fiber/test_thread.rb @@ -66,4 +66,18 @@ class TestFiberThread < Test::Unit::TestCase https://github.com/ruby/ruby/blob/trunk/test/fiber/test_thread.rb#L66 thread.join end end + + def test_thread_join_hang + thread = Thread.new do + scheduler = SleepingUnblockScheduler.new + + Fiber.set_scheduler scheduler + + Fiber.schedule do + Thread.new{sleep(0.01)}.value + end + end + + thread.join + end end diff --git a/thread.c b/thread.c index 0f6838e..ec67bba 100644 --- a/thread.c +++ b/thread.c @@ -632,6 +632,7 @@ thread_cleanup_func_before_exec(void *th_ptr) https://github.com/ruby/ruby/blob/trunk/thread.c#L632 { rb_thread_t *th = th_ptr; th->status = THREAD_KILLED; + // The thread stack doesn't exist in the forked process: th->ec->machine.stack_start = th->ec->machine.stack_end = NULL; @@ -817,6 +818,9 @@ thread_start_func_2(rb_thread_t *th, VALUE *stack_start) https://github.com/ruby/ruby/blob/trunk/thread.c#L818 thread_debug("thread start (get lock): %p\n", (void *)th); + // Ensure that we are not joinable. + VM_ASSERT(th->value == Qundef); + EC_PUSH_TAG(th->ec); if ((state = EC_EXEC_TAG()) == TAG_NONE) { @@ -857,6 +861,12 @@ thread_start_func_2(rb_thread_t *th, VALUE *stack_start) https://github.com/ruby/ruby/blob/trunk/thread.c#L861 th->value = Qnil; } + // The thread is effectively finished and can be joined. + VM_ASSERT(th->value != Qundef); + + rb_threadptr_join_list_wakeup(th); + rb_threadptr_unlock_all_locking_mutexes(th); + if (th->invoke_type == thread_invoke_type_ractor_proc) { rb_thread_terminate_all(th); rb_ractor_teardown(th->ec); @@ -874,9 +884,6 @@ thread_start_func_2(rb_thread_t *th, VALUE *stack_start) https://github.com/ruby/ruby/blob/trunk/thread.c#L884 rb_threadptr_raise(ractor_main_th, 1, &errinfo); } - rb_threadptr_join_list_wakeup(th); - rb_threadptr_unlock_all_locking_mutexes(th); - EC_POP_TAG(); rb_ec_clear_current_thread_trace_func(th->ec); @@ -1153,6 +1160,12 @@ remove_from_join_list(VALUE arg) https://github.com/ruby/ruby/blob/trunk/thread.c#L1160 static rb_hrtime_t *double2hrtime(rb_hrtime_t *, double); +static int +thread_finished(rb_thread_t *th) +{ + return th->status == THREAD_KILLED || th->value != Qundef; +} + static VALUE thread_join_sleep(VALUE arg) { @@ -1179,7 +1192,7 @@ thread_join_sleep(VALUE arg) https://github.com/ruby/ruby/blob/trunk/thread.c#L1192 end = rb_hrtime_add(*limit, rb_hrtime_now()); } - while (target_th->status != THREAD_KILLED) { + while (!thread_finished(target_th)) { VALUE scheduler = rb_fiber_scheduler_current(); if (scheduler != Qnil) { diff --git a/vm.c b/vm.c index 78aadb6..75cd371 100644 --- a/vm.c +++ b/vm.c @@ -3081,6 +3081,8 @@ th_init(rb_thread_t *th, VALUE self) https://github.com/ruby/ruby/blob/trunk/vm.c#L3081 th->thread_id_string[0] = '\0'; #endif + th->value = Qundef; + #if OPT_CALL_THREADED_CODE th->retval = Qundef; #endif @@ -3093,16 +3095,17 @@ static VALUE https://github.com/ruby/ruby/blob/trunk/vm.c#L3095 ruby_thread_init(VALUE self) { rb_thread_t *th = GET_THREAD(); - rb_thread_t *targe_th = rb_thread_ptr(self); + rb_thread_t *target_th = rb_thread_ptr(self); rb_vm_t *vm = th->vm; - targe_th->vm = vm; - th_init(targe_th, self); + target_th->vm = vm; + th_init(target_th, self); + + target_th->top_wrapper = 0; + target_th->top_self = rb_vm_top_self(); + target_th->ec->root_svar = Qfalse; + target_th->ractor = th->ractor; - targe_th->top_wrapper = 0; - targe_th->top_self = rb_vm_top_self(); - targe_th->ec->root_svar = Qfalse; - targe_th->ractor = th->ractor; return self; } -- cgit v1.1 -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/