[前][次][番号順一覧][スレッド一覧]

ruby-changes:39495

From: normal <ko1@a...>
Date: Fri, 14 Aug 2015 18:44:26 +0900 (JST)
Subject: [ruby-changes:39495] normal:r51576 (trunk): improve handling of timer thread shutdown

normal	2015-08-14 18:44:10 +0900 (Fri, 14 Aug 2015)

  New Revision: 51576

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=51576

  Log:
    improve handling of timer thread shutdown
    
    Shutting down the timer thread now always closes pipes to free FDs.
    In fact, we close the write ends of the pipes is done in the main
    RubyVM to signal the timer thread shutdown.
    
    To effectively close pipes, we implement userspace locks via
    atomics to force the pipe closing thread to wait on any signal
    handlers which may be waking up.
    
    While we're at it, improve robustness during resource exhaustion and
    allow it to limp along non-fatally if restarting a timer thread
    fails.
    
    This reverts r51268
    
    Note: this change is tested with VM_CHECK_MODE 1 in vm_core.h
    
    * process.c (close_unless_reserved): add extra check
      (dup2_with_divert): remove
      (redirect_dup2): use dup2 without divert
      (before_exec_non_async_signal_safe): adjust call + comment
      (rb_f_exec): stop timer thread for all OSes
      (rb_exec_without_timer_thread): remove
    * eval.c (ruby_cleanup): adjust call
    * thread.c (rb_thread_stop_timer_thread): always close pipes
    * thread_pthread.c (struct timer_thread_pipe): add writing field,
        mark owner_process volatile for signal handlers
      (rb_thread_wakeup_timer_thread_fd): check valid FD
      (rb_thread_wakeup_timer_thread): set writing flag to prevent close
      (rb_thread_wakeup_timer_thread_low): ditto
      (CLOSE_INVALIDATE): new macro
      (close_invalidate): new function
      (close_communication_pipe): removed
      (setup_communication_pipe_internal): make errors non-fatal
      (setup_communication_pipe): ditto
      (thread_timer): close reading ends inside timer thread
      (rb_thread_create_timer_thread): make errors non-fatal
      (native_stop_timer_thread): close write ends only, always,
       wait for signal handlers to finish
      (rb_divert_reserved_fd): remove
    * thread_win32.c (native_stop_timer_thread): adjust (untested)
      (rb_divert_reserved_fd): remove
    * vm_core.h: adjust prototype

  Modified files:
    trunk/ChangeLog
    trunk/eval.c
    trunk/process.c
    trunk/thread.c
    trunk/thread_pthread.c
    trunk/thread_win32.c
    trunk/vm_core.h
Index: thread_win32.c
===================================================================
--- thread_win32.c	(revision 51575)
+++ thread_win32.c	(revision 51576)
@@ -730,7 +730,7 @@ rb_thread_create_timer_thread(void) https://github.com/ruby/ruby/blob/trunk/thread_win32.c#L730
 }
 
 static int
-native_stop_timer_thread(int close_anyway)
+native_stop_timer_thread(void)
 {
     int stopped = --system_working <= 0;
     if (stopped) {
@@ -787,12 +787,6 @@ rb_reserved_fd_p(int fd) https://github.com/ruby/ruby/blob/trunk/thread_win32.c#L787
 {
     return 0;
 }
-
-int
-rb_divert_reserved_fd(int fd)
-{
-    return 0;
-}
 
 rb_nativethread_id_t
 rb_nativethread_self(void)
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 51575)
+++ ChangeLog	(revision 51576)
@@ -1,3 +1,32 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Fri Aug 14 18:43:11 2015  Eric Wong  <e@8...>
+
+	* process.c (close_unless_reserved): add extra check
+	  (dup2_with_divert): remove
+	  (redirect_dup2): use dup2 without divert
+	  (before_exec_non_async_signal_safe): adjust call + comment
+	  (rb_f_exec): stop timer thread for all OSes
+	  (rb_exec_without_timer_thread): remove
+	* eval.c (ruby_cleanup): adjust call
+	* thread.c (rb_thread_stop_timer_thread): always close pipes
+	* thread_pthread.c (struct timer_thread_pipe): add writing field,
+	  mark owner_process volatile for signal handlers
+	  (rb_thread_wakeup_timer_thread_fd): check valid FD
+	  (rb_thread_wakeup_timer_thread): set writing flag to prevent close
+	  (rb_thread_wakeup_timer_thread_low): ditto
+	  (CLOSE_INVALIDATE): new macro
+	  (close_invalidate): new function
+	  (close_communication_pipe): removed
+	  (setup_communication_pipe_internal): make errors non-fatal
+	  (setup_communication_pipe): ditto
+	  (thread_timer): close reading ends inside timer thread
+	  (rb_thread_create_timer_thread): make errors non-fatal
+	  (native_stop_timer_thread): close write ends only, always,
+	   wait for signal handlers to finish
+	  (rb_divert_reserved_fd): remove
+	* thread_win32.c (native_stop_timer_thread): adjust (untested)
+	  (rb_divert_reserved_fd): remove
+	* vm_core.h: adjust prototype
+
 Fri Aug 14 18:40:43 2015  Nobuyoshi Nakada  <nobu@r...>
 
 	* ext/win32/lib/win32/registry.rb (API#SetValue): add terminator
Index: thread_pthread.c
===================================================================
--- thread_pthread.c	(revision 51575)
+++ thread_pthread.c	(revision 51576)
@@ -1270,9 +1270,16 @@ static int check_signal_thread_list(void https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1270
 
 #if USE_SLEEPY_TIMER_THREAD
 static struct {
+    /*
+     * Read end of each pipe is closed inside timer thread for shutdown
+     * Write ends are closed by a normal Ruby thread during shutdown
+     */
     int normal[2];
     int low[2];
-    rb_pid_t owner_process;
+
+    /* volatile for signal handler use: */
+    volatile rb_pid_t owner_process;
+    rb_atomic_t writing;
 } timer_thread_pipe = {
     {-1, -1},
     {-1, -1}, /* low priority */
@@ -1280,12 +1287,13 @@ static struct { https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1287
 
 /* only use signal-safe system calls here */
 static void
-rb_thread_wakeup_timer_thread_fd(int fd)
+rb_thread_wakeup_timer_thread_fd(volatile int *fdp)
 {
     ssize_t result;
+    int fd = *fdp; /* access fdp exactly once here and do not reread fdp */
 
     /* already opened */
-    if (timer_thread_pipe.owner_process == getpid()) {
+    if (fd >= 0 && timer_thread_pipe.owner_process == getpid()) {
 	const char *buff = "!";
       retry:
 	if ((result = write(fd, buff, 1)) <= 0) {
@@ -1311,13 +1319,18 @@ rb_thread_wakeup_timer_thread_fd(int fd) https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1319
 void
 rb_thread_wakeup_timer_thread(void)
 {
-    rb_thread_wakeup_timer_thread_fd(timer_thread_pipe.normal[1]);
+    /* must be safe inside sighandler, so no mutex */
+    ATOMIC_INC(timer_thread_pipe.writing);
+    rb_thread_wakeup_timer_thread_fd(&timer_thread_pipe.normal[1]);
+    ATOMIC_DEC(timer_thread_pipe.writing);
 }
 
 static void
 rb_thread_wakeup_timer_thread_low(void)
 {
-    rb_thread_wakeup_timer_thread_fd(timer_thread_pipe.low[1]);
+    ATOMIC_INC(timer_thread_pipe.writing);
+    rb_thread_wakeup_timer_thread_fd(&timer_thread_pipe.low[1]);
+    ATOMIC_DEC(timer_thread_pipe.writing);
 }
 
 /* VM-dependent API is not available for this function */
@@ -1351,16 +1364,16 @@ consume_communication_pipe(int fd) https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1364
     }
 }
 
+#define CLOSE_INVALIDATE(expr) close_invalidate(&expr,#expr)
 static void
-close_communication_pipe(int pipes[2])
+close_invalidate(volatile int *fdp, const char *msg)
 {
-    if (close(pipes[0]) < 0) {
-	rb_bug_errno("native_stop_timer_thread - close(ttp[0])", errno);
-    }
-    if (close(pipes[1]) < 0) {
-	rb_bug_errno("native_stop_timer_thread - close(ttp[1])", errno);
+    int fd = *fdp; /* access fdp exactly once here and do not reread fdp */
+
+    *fdp = -1;
+    if (close(fd) < 0) {
+	rb_async_bug_errno(msg, errno);
     }
-    pipes[0] = pipes[1] = -1;
 }
 
 static void
@@ -1378,39 +1391,47 @@ set_nonblock(int fd) https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1391
 	rb_sys_fail(0);
 }
 
-static void
+static int
 setup_communication_pipe_internal(int pipes[2])
 {
     int err;
 
-    if (pipes[0] != -1) {
-	/* close pipe of parent process */
-	close_communication_pipe(pipes);
-    }
-
     err = rb_cloexec_pipe(pipes);
     if (err != 0) {
-	rb_bug_errno("setup_communication_pipe: Failed to create communication pipe for timer thread", errno);
+	rb_warn("Failed to create communication pipe for timer thread: %s",
+	        strerror(errno));
+	return -1;
     }
     rb_update_max_fd(pipes[0]);
     rb_update_max_fd(pipes[1]);
     set_nonblock(pipes[0]);
     set_nonblock(pipes[1]);
+    return 0;
 }
 
 /* communication pipe with timer thread and signal handler */
-static void
+static int
 setup_communication_pipe(void)
 {
-    if (timer_thread_pipe.owner_process == getpid()) {
-	/* already set up. */
-	return;
+    VM_ASSERT(timer_thread_pipe.owner_process == 0);
+    VM_ASSERT(timer_thread_pipe.normal[0] == -1);
+    VM_ASSERT(timer_thread_pipe.normal[1] == -1);
+    VM_ASSERT(timer_thread_pipe.low[0] == -1);
+    VM_ASSERT(timer_thread_pipe.low[1] == -1);
+
+    if (setup_communication_pipe_internal(timer_thread_pipe.normal) < 0) {
+	return errno;
+    }
+    if (setup_communication_pipe_internal(timer_thread_pipe.low) < 0) {
+	int e = errno;
+	CLOSE_INVALIDATE(timer_thread_pipe.normal[0]);
+	CLOSE_INVALIDATE(timer_thread_pipe.normal[1]);
+	return e;
     }
-    setup_communication_pipe_internal(timer_thread_pipe.normal);
-    setup_communication_pipe_internal(timer_thread_pipe.low);
 
     /* validate pipe on this process */
     timer_thread_pipe.owner_process = getpid();
+    return 0;
 }
 
 /**
@@ -1547,7 +1568,10 @@ thread_timer(void *p) https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1568
         /* wait */
 	timer_thread_sleep(gvl);
     }
-#if !USE_SLEEPY_TIMER_THREAD
+#if USE_SLEEPY_TIMER_THREAD
+    CLOSE_INVALIDATE(timer_thread_pipe.normal[0]);
+    CLOSE_INVALIDATE(timer_thread_pipe.low[0]);
+#else
     native_mutex_unlock(&timer_thread_lock);
     native_cond_destroy(&timer_thread_cond);
     native_mutex_destroy(&timer_thread_lock);
@@ -1567,8 +1591,9 @@ rb_thread_create_timer_thread(void) https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1591
 
 	err = pthread_attr_init(&attr);
 	if (err != 0) {
-	    fprintf(stderr, "[FATAL] Failed to initialize pthread attr: %s\n", strerror(err));
-	    exit(EXIT_FAILURE);
+	    rb_warn("pthread_attr_init failed for timer: %s, scheduling broken",
+		    strerror(err));
+	    return;
         }
 # ifdef PTHREAD_STACK_MIN
 	{
@@ -1586,7 +1611,12 @@ rb_thread_create_timer_thread(void) https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1611
 #endif
 
 #if USE_SLEEPY_TIMER_THREAD
-	setup_communication_pipe();
+	err = setup_communication_pipe();
+	if (err != 0) {
+	    rb_warn("pipe creation failed for timer: %s, scheduling broken",
+		    strerror(err));
+	    return;
+	}
 #endif /* USE_SLEEPY_TIMER_THREAD */
 
 	/* create timer thread */
@@ -1599,8 +1629,13 @@ rb_thread_create_timer_thread(void) https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1629
 	err = pthread_create(&timer_thread.id, NULL, thread_timer, &GET_VM()->gvl);
 #endif
 	if (err != 0) {
-	    fprintf(stderr, "[FATAL] Failed to create timer thread: %s\n", strerror(err));
-	    exit(EXIT_FAILURE);
+	    rb_warn("pthread_create failed for timer: %s, scheduling broken",
+		    strerror(err));
+	    CLOSE_INVALIDATE(timer_thread_pipe.normal[0]);
+	    CLOSE_INVALIDATE(timer_thread_pipe.normal[1]);
+	    CLOSE_INVALIDATE(timer_thread_pipe.low[0]);
+	    CLOSE_INVALIDATE(timer_thread_pipe.low[1]);
+	    return;
 	}
 	timer_thread.created = 1;
 #ifdef HAVE_PTHREAD_ATTR_INIT
@@ -1610,30 +1645,38 @@ rb_thread_create_timer_thread(void) https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1645
 }
 
 static int
-native_stop_timer_thread(int close_anyway)
+native_stop_timer_thread(void)
 {
     int stopped;
     stopped = --system_working <= 0;
 
     if (TT_DEBUG) fprintf(stderr, "stop timer thread\n");
     if (stopped) {
-	/* join */
-	rb_thread_wakeup_timer_thread();
+	/* prevent wakeups from signal handler ASAP */
+	timer_thread_pipe.owner_process = 0;
+
+	/*
+	 * however, the above was not enough: the FD may already be
+	 * captured and in the middle of a write while we are running,
+	 * so wait for that to finish:
+	 */
+	while (ATOMIC_CAS(timer_thread_pipe.writing, 0, 0)) {
+	    native_thread_yield();
+	}
+
+	/* stop writing ends of pipes so timer thread notices EOF */
+	CLOSE_INVALIDATE(timer_thread_pipe.normal[1]);
+	CLOSE_INVALIDATE(timer_thread_pipe.low[1]);
+
+	/* timer thread will stop looping when system_working <= 0: */
 	native_thread_join(timer_thread.id);
+
+	/* timer thread will close the read end on exit: */
+	VM_ASSERT(timer_thread_pipe.normal[0] == -1);
+	VM_ASSERT(timer_thread_pipe.low[0] == -1);
+
 	if (TT_DEBUG) fprintf(stderr, "joined timer thread\n");
 	timer_thread.created = 0;
-
-	/* close communication pipe */
-	if (close_anyway) {
-	    /* TODO: Uninstall all signal handlers or mask all signals.
-	     *       This pass is cleaning phase (terminate ruby process).
-	     *       To avoid such race, we skip to close communication
-	     *       pipe.  OS will close it at process termination.
-	     *       It may not good practice, but pragmatic.
-	     *       We remain it is TODO.
-	     */
-	    /* close_communication_pipe(); */
-	}
     }
     return stopped;
 }
@@ -1707,29 +1750,6 @@ rb_reserved_fd_p(int fd) https://github.com/ruby/ruby/blob/trunk/thread_pthread.c#L1750
 #endif
 }
 
-int
-rb_divert_reserved_fd(int fd)
-{
-#if USE_SLEEPY_TIMER_THREAD
-    int *ptr;
-    int newfd;
-
-    if ((fd == *(ptr = &(timer_thread_pipe.normal[0])) ||
-         fd == *(ptr = &(timer_thread_pipe.normal[1])) ||
-         fd == *(ptr = &(timer_thread_pipe.low[0])) ||
-         fd == *(ptr = &(timer_thread_pipe.low[1]))) &&
-        timer_thread_pipe.owner_process == getpid()) { /* async-signal-safe */
-        newfd = rb_cloexec_dup(fd); /* async-signal-safe if no error */
-        if (newfd == -1) return -1;
-        rb_update_max_fd(newfd); /* async-signal-safe if no error */
-        /* set_nonblock(newfd); */ /* async-signal-safe if no error */
-        *ptr = newfd;
-        rb_thread_wakeup_timer_thread_low(); /* async-signal-safe? */
-    }
-#endif
-    return 0;
-}
-
 rb_nativethread_id_t
 rb_nativethread_self(void)
 {
Index: vm_core.h
===================================================================
--- vm_core.h	(revision 51575)
+++ vm_core.h	(revision 51576)
@@ -979,7 +979,7 @@ VALUE rb_vm_call(rb_thread_t *th, VALUE https://github.com/ruby/ruby/blob/trunk/vm_core.h#L979
 		 const VALUE *argv, const rb_callable_method_entry_t *me);
 
 void rb_thread_start_timer_thread(void);
-void rb_thread_stop_timer_thread(int);
+void rb_thread_stop_timer_thread(void);
 void rb_thread_reset_timer_thread(void);
 void rb_thread_wakeup_timer_thread(void);
 
Index: thread.c
===================================================================
--- thread.c	(revision 51575)
+++ thread.c	(revision 51576)
@@ -3864,9 +3864,9 @@ timer_thread_function(void *arg) https://github.com/ruby/ruby/blob/trunk/thread.c#L3864
 }
 
 void
-rb_thread_stop_timer_thread(int close_anyway)
+rb_thread_stop_timer_thread(void)
 {
-    if (TIMER_THREAD_CREATED_P() && native_stop_timer_thread(close_anyway)) {
+    if (TIMER_THREAD_CREATED_P() && native_stop_timer_thread()) {
 	native_reset_timer_thread();
     }
 }
Index: eval.c
===================================================================
--- eval.c	(revision 51575)
+++ eval.c	(revision 51576)
@@ -223,7 +223,7 @@ ruby_cleanup(volatile int ex) https://github.com/ruby/ruby/blob/trunk/eval.c#L223
     /* unlock again if finalizer took mutexes. */
     rb_threadptr_unlock_all_locking_mutexes(GET_THREAD());
     TH_POP_TAG();
-    rb_thread_stop_timer_thread(1);
+    rb_thread_stop_timer_thread();
     ruby_vm_destruct(GET_VM());
     if (state) ruby_default_signal(state);
 
Index: process.c
===================================================================
--- process.c	(revision 51575)
+++ process.c	(revision 51576)
@@ -297,24 +297,14 @@ extern ID ruby_static_id_status; https://github.com/ruby/ruby/blob/trunk/process.c#L297
 static inline int
 close_unless_reserved(int fd)
 {
-    /* Do nothing to the reserved fd because it should be closed in exec(2)
-       due to the O_CLOEXEC or FD_CLOEXEC flag. */
+    /* We should not have reserved FDs at this point */
     if (rb_reserved_fd_p(fd)) { /* async-signal-safe */
+        rb_async_bug_errno("BUG timer thread still running", 0 /* EDOOFUS */);
         return 0;
     }
     return close(fd); /* async-signal-safe */
 }
 
-static inline int
-dup2_with_divert(int oldfd, int newfd)
-{
-    if (rb_divert_reserved_fd(newfd) == -1) { /* async-signal-safe if no error occurred */
-        return -1;
-    } else {
-        return dup2(oldfd, newfd); /* async-signal-safe */
-    }
-}
-
 /*#define DEBUG_REDIRECT*/
 #if defined(DEBUG_REDIRECT)
 
@@ -354,7 +344,7 @@ static int https://github.com/ruby/ruby/blob/trunk/process.c#L344
 redirect_dup2(int oldfd, int newfd)
 {
     int ret;
-    ret = dup2_with_divert(oldfd, newfd);
+    ret = dup2(oldfd, newfd);
     ttyprintf("dup2(%d, %d) => %d\n", oldfd, newfd, ret);
     return ret;
 }
@@ -388,7 +378,7 @@ parent_redirect_close(int fd) https://github.com/ruby/ruby/blob/trunk/process.c#L378
 
 #else
 #define redirect_dup(oldfd) dup(oldfd)
-#define redirect_dup2(oldfd, newfd) dup2_with_divert((oldfd), (newfd))
+#define redirect_dup2(oldfd, newfd) dup2((oldfd), (newfd))
 #define redirect_close(fd) close_unless_reserved(fd)
 #define parent_redirect_open(pathname, flags, perm) rb_cloexec_open((pathname), (flags), (perm))
 #define parent_redirect_close(fd) close_unless_reserved(fd)
@@ -1151,8 +1141,10 @@ before_exec_non_async_signal_safe(void) https://github.com/ruby/ruby/blob/trunk/process.c#L1141
      * internal threads temporary. [ruby-core:10583]
      * This is also true on Haiku. It returns Errno::EPERM against exec()
      * in multiple threads.
+     *
+     * Nowadays, we always stop the timer thread completely to allow redirects.
      */
-    rb_thread_stop_timer_thread(0);
+    rb_thread_stop_timer_thread();
 }
 
 static void
@@ -2472,10 +2464,6 @@ rb_execarg_parent_end(VALUE execarg_obj) https://github.com/ruby/ruby/blob/trunk/process.c#L2464
     RB_GC_GUARD(execarg_obj);
 }
 
-#if defined(__APPLE__) || defined(__HAIKU__)
-static int rb_exec_without_timer_thread(const struct rb_execarg *eargp, char *errmsg, size_t errmsg_buflen);
-#endif
-
 /*
  *  call-seq:
  *     exec([env,] command... [,options])
@@ -2559,16 +2547,14 @@ rb_f_exec(int argc, const VALUE *argv) https://github.com/ruby/ruby/blob/trunk/process.c#L2547
 
     execarg_obj = rb_execarg_new(argc, argv, TRUE);
     eargp = rb_execarg_get(execarg_obj);
+    before_exec(); /* stop timer thread before redirects */
     rb_execarg_parent_start(execarg_obj);
     fail_str = eargp->use_shell ? eargp->invoke.sh.shell_script : eargp->invoke.cmd.command_name;
 
-#if defined(__APPLE__) || defined(__HAIKU__)
-    rb_exec_without_timer_thread(eargp, errmsg, sizeof(errmsg));
-#else
-    before_exec_async_signal_safe(); /* async-signal-safe */
     rb_exec_async_signal_safe(eargp, errmsg, sizeof(errmsg));
-    preserving_errno(after_exec_async_signal_safe()); /* async-signal-safe */
-#endif
+
+    preserving_errno(after_exec()); /* restart timer thread */
+
     RB_GC_GUARD(execarg_obj);
     if (errmsg[0])
         rb_sys_fail(errmsg);
@@ -3076,18 +3062,6 @@ failure: https://github.com/ruby/ruby/blob/trunk/process.c#L3062
     return -1;
 }
 
-#if defined(__APPLE__) || defined(__HAIKU__)
-static int
-rb_exec_without_timer_thread(const struct rb_execarg *eargp, char *errmsg, size_t errmsg_buflen)
-{
-    int ret;
-    before_exec();
-    ret = rb_exec_async_signal_safe(eargp, errmsg, errmsg_buflen); /* hopefully async-signal-safe */
-    preserving_errno(after_exec()); /* not async-signal-safe because it calls rb_thread_start_timer_thread.  */
-    return ret;
-}
-#endif
-
 #ifdef HAVE_WORKING_FORK
 /* This function should be async-signal-safe.  Hopefully it is. */
 static int

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]