MB-20822: When loading persisted failovers, remove any erroneous entries 02/67702/8
authorManu Dhundi <manu@couchbase.com>
Tue, 4 Oct 2016 20:31:33 +0000 (13:31 -0700)
committerDave Rigby <daver@couchbase.com>
Tue, 11 Oct 2016 07:51:36 +0000 (07:51 +0000)
Due to bugs in older releases (and also possibly in future releases),
we may end up storing wrong failover table on disk. Hence during
warmup while loading failover table from disk we must prune out any
wrong entries.

Also, in the past we have seen erroneous entries with vb_uuid == 0.
So we make sure that vb_uuid is not generated as a valid value and
prune out any entry that has vb_uuid == 0

Change-Id: I630cb7fb1ea9a711432be64f36924d04fcd5e361
Reviewed-on: http://review.couchbase.org/67702
Tested-by: buildbot <build@couchbase.com>
Reviewed-by: Dave Rigby <daver@couchbase.com>
docs/stats.org
src/failover-table.cc
src/failover-table.h
tests/ep_testsuite.cc
tests/module_tests/failover_table_test.cc

index 83856b0..cb1eda5 100644 (file)
@@ -495,6 +495,18 @@ The stats below are listed for each vbucket.
 | last_persisted_snap_end       | The last persisted snapshot end seqno for  |
 |                               | the vbucket                                |
 
+** vBucket failover stats
+
+| Stats                         | Description                                |
+| ------------------------------+--------------------------------------------|
+| num_entries                   | Number of entries in the failover table of |
+|                               | this vbucket                               |
+| erroneous_entries_erased      | Number of erroneous entries erased in the  |
+|                               | failover table of this vbucket             |
+| n:id                          | vb_uuid of nth failover entry in the       |
+|                               | failover table of this vbucket             |
+| n:seq                         | seqno of nth failover entry in the         |
+|                               | failover table of this vbucket             |
 
 ** Tap stats
 
index 2ba252a..086dfe8 100644 (file)
 #undef STATWRITER_NAMESPACE
 
 FailoverTable::FailoverTable(size_t capacity)
-    : max_entries(capacity), provider(true) {
+    : max_entries(capacity), erroneousEntriesErased(0), provider(true) {
     createEntry(0);
     cacheTableJSON();
 }
 
 FailoverTable::FailoverTable(const std::string& json, size_t capacity)
     : max_entries(capacity),
+      erroneousEntriesErased(0),
       provider(true) {
     if (!loadFromJSON(json)) {
         throw std::invalid_argument("FailoverTable(): unable to load from "
                 "JSON file '" + json + "'");
     }
+    sanitizeFailoverTable();
 }
 
 FailoverTable::~FailoverTable() { }
@@ -53,14 +55,24 @@ uint64_t FailoverTable::getLatestUUID() {
 
 void FailoverTable::createEntry(uint64_t high_seqno) {
     LockHolder lh(lock);
+
     // Our failover table represents only *our* branch of history.
     // We must remove branches we've diverged from.
+    // Entries that we remove here are not erroneous entries because a
+    // diverged branch due to node(s) failure(s).
     table.remove_if([high_seqno](const failover_entry_t& e) {
                         return (e.by_seqno > high_seqno);
                     });
 
     failover_entry_t entry;
-    entry.vb_uuid = (provider.next() >> 16);
+    /* In past we have seen some erroneous entries in failover table with
+       vb_uuid == 0 due to some bugs in the code which read/wrote the failover
+       table from/to the disk or due to a some unknown buggy code.
+       Hence we choose not to have 0 as a valid vb_uuid value. Loop below
+       regenerates the vb_uuid in case 0 is generated by random generator */
+    do {
+        entry.vb_uuid = (provider.next() >> 16);
+    } while(0 == entry.vb_uuid);
     entry.by_seqno = high_seqno;
     table.push_front(entry);
     latest_uuid = entry.vb_uuid;
@@ -223,6 +235,9 @@ void FailoverTable::addStats(const void* cookie, uint16_t vbid,
         char statname[80] = {0};
         checked_snprintf(statname, sizeof(statname), "vb_%d:num_entries", vbid);
         add_casted_stat(statname, table.size(), add_stat, cookie);
+        checked_snprintf(statname, sizeof(statname),
+                         "vb_%d:num_erroneous_entries_erased", vbid);
+        add_casted_stat(statname, table.size(), add_stat, cookie);
 
         table_t::iterator it;
         int entrycounter = 0;
@@ -357,3 +372,33 @@ void FailoverTable::adjustSnapshotRange(uint64_t start_seqno,
         snap_end_seqno = start_seqno;
     }
 }
+
+void FailoverTable::sanitizeFailoverTable()
+{
+    size_t intialTableSize = table.size();
+    for (auto itr = table.begin(); itr != table.end(); ) {
+        if (0 == itr->vb_uuid) {
+            /* 1. Prune entries with vb_uuid == 0. (From past experience we have
+                  seen erroneous entries mostly have vb_uuid == 0, hence we have
+                  chosen not to use 0 as valid vb_uuid) */
+            itr = table.erase(itr);
+            continue;
+        }
+        if (itr != table.begin()) {
+            auto prevItr = std::prev(itr);
+            if (itr->by_seqno > prevItr->by_seqno) {
+                /* 2. Prune any entry that has a by_seqno greater than by_seqno
+                      of prev entry. (Entries are pushed at the head of the
+                      table and must have seqno > seqno of following entries) */
+                itr = table.erase(itr);
+                continue;
+            }
+        }
+        ++itr;
+    }
+    erroneousEntriesErased += (intialTableSize - table.size());
+}
+
+size_t FailoverTable::getNumErroneousEntriesErased() const {
+    return erroneousEntriesErased;
+}
index 85a1de3..a7fbefc 100644 (file)
@@ -155,6 +155,14 @@ class FailoverTable {
      */
     size_t getNumEntries() const;
 
+    /**
+     * Returns total number of erroneous entries that were erased from the
+     * failover table.
+     *
+     * @return total number of entries
+     */
+    size_t getNumErroneousEntriesErased() const;
+
  private:
 
     bool loadFromJSON(cJSON *json);
@@ -174,9 +182,17 @@ class FailoverTable {
                              uint64_t &snap_start_seqno,
                              uint64_t &snap_end_seqno);
 
+    /**
+     * Remove any wrong entries in failover table
+     *
+     * called only in ctor, hence does not grab lock
+     */
+    void sanitizeFailoverTable();
+
     Mutex lock;
     table_t table;
     size_t max_entries;
+    size_t erroneousEntriesErased;
     Couchbase::RandomGenerator provider;
     std::string cachedTableJSON;
     AtomicValue<uint64_t> latest_uuid;
index 92f02ac..30a85a8 100644 (file)
@@ -6268,14 +6268,16 @@ static enum test_result test_mb19687_fixed(ENGINE_HANDLE* h,
             {
                 "vb_0:0:id",
                 "vb_0:0:seq",
-                "vb_0:num_entries"
+                "vb_0:num_entries",
+                "vb_0:num_erroneous_entries_erased"
             }
         },
         {"failovers",
             {
                 "vb_0:0:id",
                 "vb_0:0:seq",
-                "vb_0:num_entries"
+                "vb_0:num_entries",
+                "vb_0:num_erroneous_entries_erased"
             }
         },
         {"diskinfo",
index 6352e21..521fda8 100644 (file)
@@ -286,6 +286,25 @@ static void test_max_capacity() {
     cb_assert(table.getLatestEntry().by_seqno == max_seqno);
 }
 
+static void test_sanitize_failover_table()
+{
+    const int numErroneousEntries = 4, numCorrectEntries = 2;
+    std::string failover_json(/* Erroneous entry */
+                              "[{\"id\":0,\"seq\":0},"
+                              "{\"id\":1356861809263,\"seq\":100},"
+                              /* Erroneous entry */
+                              "{\"id\":227813077095126,\"seq\":200},"
+                              /* Erroneous entry */
+                              "{\"id\":227813077095128,\"seq\":300},"
+                              /* Erroneous entry */
+                              "{\"id\":0,\"seq\":50},"
+                              "{\"id\":160260368866392,\"seq\":0}]");
+    FailoverTable table(failover_json, 10 /* max_entries */);
+
+    cb_assert(numCorrectEntries == table.getNumEntries());
+    cb_assert(numErroneousEntries == table.getNumErroneousEntriesErased());
+}
+
 int main(int argc, char **argv) {
     (void)argc;
     (void)argv;
@@ -297,5 +316,6 @@ int main(int argc, char **argv) {
     test_edgetests_failover_log();
     test_add_entry();
     test_max_capacity();
+    test_sanitize_failover_table();
     return 0;
 }