MB-19222: Fix race condition in TaskQueue shutdown 11/62911/4
authorDave Rigby <daver@couchbase.com>
Wed, 12 Nov 2014 14:58:28 +0000 (14:58 +0000)
committerChiyoung Seo <chiyoung@couchbase.com>
Sat, 23 Apr 2016 00:49:32 +0000 (00:49 +0000)
There is a bug in the use of ExecutorThread.state when sleeping a
TaskQueue - TaskQueue::_doSleep() doesn't atomically transition the
state from RUNNING -> SLEEPING. This can cause a deadlock when
shutting down a ExecutorThread:

    Thread A:                           Thread B:
    --------------------------------    ------------------------------
    if (t.state == RUNNING) {  // true
                                        t.state = SHUTDOWN
        t.state = SLEEPING              cb_join_thread(Thread A)
                                        // wait forever
    ...
    if (t.state == SHUTDOWN) { // FALSE
      exit(0) // NEVER REACHED
    }

Fix by changing ExecutorThread.state to be an AtomicValue, and use
compare-and-exchange to move from RUNNING -> SLEEPING (and SLEEPING ->
RUNNING).

Change-Id: I9fab90a83978ae2aa6a0dcdd3b079a1c2f369402
Reviewed-on: http://review.couchbase.org/62911
Well-Formed: buildbot <build@couchbase.com>
Tested-by: buildbot <build@couchbase.com>
Reviewed-by: Will Gardner <will.gardner@couchbase.com>
src/executorthread.cc
src/executorthread.h
src/taskqueue.cc

index 19dc788..2d20c6b 100644 (file)
@@ -174,7 +174,7 @@ void ExecutorThread::addLogEntry(const std::string &desc,
 }
 
 const std::string ExecutorThread::getStateName() {
-    switch (state) {
+    switch (state.load()) {
     case EXECUTOR_CREATING:
         return std::string("creating");
     case EXECUTOR_RUNNING:
index 1b6066f..cd45007 100644 (file)
@@ -123,7 +123,7 @@ private:
     ExecutorPool *manager;
     int startIndex;
     const std::string name;
-    volatile executor_state_t state;
+    AtomicValue<executor_state_t> state;
 
     struct  timeval    now;  // record of current time
     struct timeval waketime; // set to the earliest
index 26dbac9..03bf90a 100644 (file)
@@ -62,9 +62,11 @@ void TaskQueue::_doWake_UNLOCKED(size_t &numToWake) {
 bool TaskQueue::_doSleep(ExecutorThread &t) {
     gettimeofday(&t.now, NULL);
     if (less_tv(t.now, t.waketime) && manager->trySleep(queueType)) {
-        if (t.state == EXECUTOR_RUNNING) {
-            t.state = EXECUTOR_SLEEPING;
-        } else {
+        // Atomically switch from running to sleeping; iff we were previously
+        // running.
+        executor_state_t expected_state = EXECUTOR_RUNNING;
+        if (!t.state.compare_exchange_strong(expected_state,
+                                             EXECUTOR_SLEEPING)) {
             return false;
         }
         sleepers++;
@@ -80,9 +82,11 @@ bool TaskQueue::_doSleep(ExecutorThread &t) {
         sleepers--;
         manager->woke();
 
-        if (t.state == EXECUTOR_SLEEPING) {
-            t.state = EXECUTOR_RUNNING;
-        } else {
+        // Finished our sleep, atomically switch back to running iff we were
+        // previously sleeping.
+        expected_state = EXECUTOR_SLEEPING;
+        if (!t.state.compare_exchange_strong(expected_state,
+                                             EXECUTOR_RUNNING)) {
             return false;
         }
         gettimeofday(&t.now, NULL);