MB-20751: Fix lock cycle (deadlock) during bucket delete & client disconnect 52/67252/4
authorDave Rigby <daver@couchbase.com>
Thu, 1 Sep 2016 15:30:41 +0000 (16:30 +0100)
committerDave Rigby <daver@couchbase.com>
Thu, 1 Sep 2016 21:09:29 +0000 (21:09 +0000)
MB-20716 recently fixed an issue where idle DCP connections (in
EWOULDBLOCK state) would not be notified (woken up) in the frontend
when a bucket was deleted. The fix for this was to trigger a notify
(via producer->notifyPaused()) as part of ep-engine bucket delete.

Unfortunately this introduced a lock-order-inversion (deadlock)
between two mutxes, which caused memcached to hang during shutdown,
as one (or more) worker threads would never terminate.

The issue is between:

1. Frontend_worker thread mutex (threadLock)
2. ConnMap::connsLock

And at least two threads (although normally 3 in the wild):

T1: Frontend worker thread
T2: DestroyBucket thread
(optional T3: A NONIO thread running ConnManager)

During bucket delete, the follow sequence occurs which creates a cycle
between threadLock and connsLock:

T1<Worker>:
    event_handler() ... conn_pending_close()
      -> LOCK(threadLock)
    DcpConnMap::disconnect()
      -> ACQUIRE(connsLock)

T2<DeleteBucket>:
    EventuallyPersistentEngine::handleDeleteBucket() ...
    DcpConnMap::shutdownAllConnections()
      -> LOCK(connsLock)
    notifyIOComplete() ... DcpProducer::notifyPaused()
      -> ACQUIRE(threadLock)

Part of the issue here is that DcpProducer::notifyPaused() *must* be
called with schedule==false, as there is no longer a ConnNotifier task
running on another thread (which never acquires the connsLock and
hence avoids any deadlock), as the ConnNotifier has been shutdown in
DcpConnMap::shutdownAllConnections previously. Therefore we need to
use the variant of notifyPaused which calls notify_IO_complete in the
same thread.

The solution chosen is to essentially drop the connsLock in
shutdownAllConnections before calling notify. We achive this by taking
a _copy_ of the connections map (under connsLock), and then iterating
over this copy and calling notify etc. This is safe as the elements of
the map are all ref-counted pointers.

Change-Id: I73f9b7576e42030a9f5219ae51e604e36fabcac7
Reviewed-on: http://review.couchbase.org/67252
Well-Formed: buildbot <build@couchbase.com>
Tested-by: buildbot <build@couchbase.com>
Reviewed-by: Trond Norbye <trond.norbye@gmail.com>
src/connmap.cc
src/connmap.h
tests/module_tests/evp_store_test.cc

index 13de1bb..6ca33cf 100644 (file)
@@ -1087,11 +1087,20 @@ void DcpConnMap::shutdownAllConnections() {
         connNotifier_->stop();
     }
 
+    // Take a copy of the connection map (under lock), then using the
+    // copy iterate across cloing all streams and cancelling any
+    // tasks.
+    // We do this so we don't hold the connsLock when calling
+    // notifyPaused() on producer streams, as that would create a lock
+    // cycle between connLock, worker thread lock and releaseLock.
+    CookieToConnectionMap mapCopy;
     {
         LockHolder lh(connsLock);
-        closeAllStreams_UNLOCKED();
-        cancelAllTasks_UNLOCKED();
+        mapCopy = map_;
     }
+
+    closeStreams(mapCopy);
+    cancelTasks(mapCopy);
 }
 
 void DcpConnMap::vbucketStateChanged(uint16_t vbucket, vbucket_state_t state,
@@ -1125,10 +1134,9 @@ bool DcpConnMap::handleSlowStream(uint16_t vbid,
     return false;
 }
 
-void DcpConnMap::closeAllStreams_UNLOCKED() {
-    std::map<const void*, connection_t>::iterator itr = map_.begin();
-    for (; itr != map_.end(); ++itr) {
-        DcpProducer* producer = dynamic_cast<DcpProducer*> (itr->second.get());
+void DcpConnMap::closeStreams(CookieToConnectionMap& map) {
+    for (auto itr : map) {
+        DcpProducer* producer = dynamic_cast<DcpProducer*> (itr.second.get());
         if (producer) {
             producer->closeAllStreams();
             producer->clearCheckpointProcessorTaskQueues();
@@ -1137,15 +1145,14 @@ void DcpConnMap::closeAllStreams_UNLOCKED() {
             // connection.
             producer->notifyPaused(/*schedule*/false);
         } else {
-            static_cast<DcpConsumer*>(itr->second.get())->closeAllStreams();
+            static_cast<DcpConsumer*>(itr.second.get())->closeAllStreams();
         }
     }
 }
 
-void DcpConnMap::cancelAllTasks_UNLOCKED() {
-    std::map<const void*, connection_t>::iterator itr = map_.begin();
-    for (; itr != map_.end(); ++itr) {
-        DcpConsumer* consumer = dynamic_cast<DcpConsumer*> (itr->second.get());
+void DcpConnMap::cancelTasks(CookieToConnectionMap& map) {
+    for (auto itr : map) {
+        DcpConsumer* consumer = dynamic_cast<DcpConsumer*> (itr.second.get());
         if (consumer) {
             consumer->cancelTask();
         }
index 398a7e4..5981377 100644 (file)
@@ -229,7 +229,8 @@ protected:
     // ConnHandler objects is guarded by {releaseLock}.
     Mutex                                    connsLock;
 
-    std::map<const void*, connection_t>      map_;
+    using CookieToConnectionMap = std::map<const void*, connection_t>;
+    CookieToConnectionMap map_;
     std::list<connection_t>                  all;
 
     SpinLock *vbConnLocks;
@@ -547,9 +548,15 @@ protected:
 
     bool isPassiveStreamConnected_UNLOCKED(uint16_t vbucket);
 
-    void closeAllStreams_UNLOCKED();
+    /*
+     * Closes all streams associated with each connection in `map`.
+     */
+    static void closeStreams(CookieToConnectionMap& map);
 
-    void cancelAllTasks_UNLOCKED();
+    /*
+     * Cancels all tasks assocuated with each connection in `map`.
+     */
+    static void cancelTasks(CookieToConnectionMap& map);
 
     SpinLock numBackfillsLock;
     /* Db file memory */
index 8a62028..28d8b23 100644 (file)
@@ -157,6 +157,46 @@ void EventuallyPersistentStoreTest::store_item(uint16_t vbid,
 }
 
 
+// Verify that when handling a bucket delete with open DCP
+// connections, we don't deadlock when notifying the front-end
+// connection.
+// This is a potential issue because notify_IO_complete
+// needs to lock the worker thread mutex the connection is assigned
+// to, to update the event list for that connection, which the worker
+// thread itself will have locked while it is running. Normally
+// deadlock is avoided by using a background thread (ConnNotifier),
+// which only calls notify_IO_complete and isnt' involved with any
+// other mutexes, however we cannot use that task as it gets shut down
+// during shutdownAllConnections.
+// This test requires ThreadSanitizer or similar to validate;
+// there's no guarantee we'll actually deadlock on any given run.
+TEST_F(EventuallyPersistentStoreTest, test_mb20751_deadlock_on_disconnect_delete) {
+
+    // Create a new Dcp producer, reserving its cookie.
+    get_mock_server_api()->cookie->reserve(cookie);
+    dcp_producer_t producer = engine->getDcpConnMap().newProducer(
+        cookie, "mb_20716r", /*notifyOnly*/false);
+
+    // Check preconditions.
+    EXPECT_TRUE(producer->isPaused());
+
+    // 1. To check that there's no potential data-race with the
+    //    concurrent connection disconnect on another thread
+    //    (simulating a front-end thread).
+    std::thread frontend_thread_handling_disconnect{[this](){
+            // Frontend thread always runs with the cookie locked, so
+            // lock here to match.
+            lock_mock_cookie(cookie);
+            engine->handleDisconnect(cookie);
+            unlock_mock_cookie(cookie);
+        }};
+
+    // 2. Trigger a bucket deletion.
+    engine->handleDeleteBucket(cookie);
+
+    frontend_thread_handling_disconnect.join();
+}
+
 class EPStoreEvictionTest : public EventuallyPersistentStoreTest,
                              public ::testing::WithParamInterface<std::string> {
     void SetUp() override {