MB-21769: Fix vbstate snap start/end off by one 40/70340/4
authorJim Walker <jim@couchbase.com>
Thu, 24 Nov 2016 10:41:25 +0000 (10:41 +0000)
committerDave Rigby <daver@couchbase.com>
Fri, 25 Nov 2016 07:56:34 +0000 (07:56 +0000)
Recent changes to vbstate writing introduced an edge case regression.
If we force shutdown and had empty active vbuckets, after warmup
we will expose an incorrect failover table as we have an entry
for seq 1, yet the VB has high-seq of 0.

Change-Id: Iee67f71ce46c8eaf4f2cd822103354dcdecc04d8
Reviewed-on: http://review.couchbase.org/70340
Reviewed-by: Dave Rigby <daver@couchbase.com>
Reviewed-by: Manu Dhundi <manu@couchbase.com>
Tested-by: buildbot <build@couchbase.com>
src/ep.cc
tests/ep_test_apis.cc
tests/ep_test_apis.h
tests/ep_testsuite_basic.cc

index 4413776..a44a47a 100644 (file)
--- a/src/ep.cc
+++ b/src/ep.cc
@@ -3197,11 +3197,6 @@ int EventuallyPersistentStore::flushVBucket(uint16_t vbid) {
                     // no 'real' items in the checkpoint.
                     mustCheckpointVBState = true;
 
-                    // Update maxSeqno to ensure the snap {start,end} range
-                    // is correct if no other normal item is included in this
-                    // checkpoint.
-                    maxSeqno = std::max(maxSeqno, (uint64_t)item->getBySeqno());
-
                     // Update queuing stats how this item has logically been
                     // processed.
                     stats.decrDiskQueueSize(1);
index bbca8f4..9cc9c92 100644 (file)
@@ -1413,7 +1413,7 @@ void validate_store_resp(ENGINE_ERROR_CODE ret, int& num_items)
 
 void write_items(ENGINE_HANDLE *h, ENGINE_HANDLE_V1 *h1, int num_items,
                  int start_seqno, const char *key_prefix, const char *value,
-                 uint32_t expiry)
+                 uint32_t expiry, uint16_t vb)
 {
     int j = 0;
     while (1) {
@@ -1423,7 +1423,7 @@ void write_items(ENGINE_HANDLE *h, ENGINE_HANDLE_V1 *h1, int num_items,
         item *i = nullptr;
         std::string key(key_prefix + std::to_string(j + start_seqno));
         ENGINE_ERROR_CODE ret = store(h, h1, nullptr, OPERATION_SET,
-                                      key.c_str(), value, &i, /*cas*/0, /*vb*/0,
+                                      key.c_str(), value, &i, /*cas*/0, vb,
                                       expiry);
         h1->release(h, nullptr, i);
         validate_store_resp(ret, j);
index 6c5e2c0..6015e45 100644 (file)
@@ -423,11 +423,12 @@ void set_degraded_mode(ENGINE_HANDLE *h,
  * @param key_prefix Prefix for key names
  * @param value Value for each item
  * @param expiry Expiration time for each item.
+ * @param vb vbucket to use, default to 0
  */
 void write_items(ENGINE_HANDLE *h, ENGINE_HANDLE_V1 *h1,
                  int num_items, int start_seqno = 0,
                  const char *key_prefix = "key", const char *value = "data",
-                 uint32_t expiry = 0);
+                 uint32_t expiry = 0, uint16_t vb = 0);
 
 /* Helper function to write unique items starting from keyXX until memory usage
    hits "mem_thresh_perc" (XX is start_seqno) */
index 09a614b..dab44e2 100644 (file)
@@ -28,6 +28,7 @@
 #include <platform/cbassert.h>
 #include <JSON_checker.h>
 
+#include <array>
 
 #define WHITESPACE_DB "whitespace sucks.db"
 
@@ -2531,6 +2532,72 @@ static enum test_result set_max_cas_mb21190(ENGINE_HANDLE *h, ENGINE_HANDLE_V1 *
     return SUCCESS;
 }
 
+static enum test_result warmup_mb21769(ENGINE_HANDLE *h, ENGINE_HANDLE_V1 *h1) {
+    // Validate some VB data post warmup
+    // VB 0 will be empty
+    // VB 1 will not be empty
+    // VB 2 will not be empty and will have had set_state as the final ops
+
+    check(set_vbucket_state(h, h1, 1, vbucket_state_active),
+          "Failed to set vbucket state for vb1");
+    check(set_vbucket_state(h, h1, 2, vbucket_state_active),
+          "Failed to set vbucket state for vb2");
+
+    const int num_items = 10;
+    write_items(h, h1, num_items, 0, "vb1", "value", 0/*expiry*/, 1/*vb*/);
+    write_items(h, h1, num_items, 0, "vb2", "value", 0/*expiry*/, 2/*vb*/);
+    wait_for_flusher_to_settle(h, h1);
+
+    // flip replica to active to drive more _local writes
+    check(set_vbucket_state(h, h1, 2, vbucket_state_replica),
+          "Failed to set vbucket state (replica) for vb2");
+    wait_for_flusher_to_settle(h, h1);
+
+    check(set_vbucket_state(h, h1, 2, vbucket_state_active),
+          "Failed to set vbucket state (replica) for vb2");
+    wait_for_flusher_to_settle(h, h1);
+
+    // Force a shutdown so the warmup will create failover entries
+    testHarness.reload_engine(&h, &h1,
+                              testHarness.engine_path,
+                              testHarness.get_current_testcase()->cfg,
+                              true, true);
+
+    wait_for_warmup_complete(h, h1);
+
+    // values of interested stats for each VB
+    std::array<uint64_t, 3> high_seqnos = {{0, num_items, num_items}};
+    std::array<uint64_t, 3> snap_starts = {{0, num_items, num_items}};
+    std::array<uint64_t, 3> snap_ends = {{0, num_items, num_items}};
+    // we will check the seqno of the 0th entry of each vbucket's failover table
+    std::array<uint64_t, 3> failover_entry0 = {{0, num_items, num_items}};
+
+    for (uint64_t vb = 0; vb <= 2; vb++) {
+        std::string vb_prefix = "vb_" + std::to_string(vb) + ":";
+        std::string high_seqno = vb_prefix + "high_seqno";
+        std::string snap_start = vb_prefix + "last_persisted_snap_start";
+        std::string snap_end = vb_prefix + "last_persisted_snap_end";
+        std::string fail0 = vb_prefix + "0:seq";
+        std::string vb_group_key = "vbucket-seqno " + std::to_string(vb);
+        std::string failovers_key = "failovers " + std::to_string(vb);
+
+        checkeq(high_seqnos[vb],
+                get_ull_stat(h, h1, high_seqno.c_str(), vb_group_key.c_str()),
+                std::string("high_seqno incorrect vb:" + std::to_string(vb)).c_str());
+        checkeq(snap_starts[vb],
+                get_ull_stat(h, h1, snap_start.c_str(), vb_group_key.c_str()),
+                std::string("snap_start incorrect vb:" + std::to_string(vb)).c_str());
+        checkeq(snap_ends[vb],
+                get_ull_stat(h, h1, snap_end.c_str(), vb_group_key.c_str()),
+                std::string("snap_end incorrect vb:" + std::to_string(vb)).c_str());
+        checkeq(failover_entry0[vb],
+                get_ull_stat(h, h1, fail0.c_str(), failovers_key.c_str()),
+                std::string("failover table entry 0 is incorrect vb:" + std::to_string(vb)).c_str());
+    }
+
+    return SUCCESS;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // Test manifest //////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
@@ -2676,6 +2743,8 @@ BaseTestCase testsuite_testcases[] = {
                  "ht_size=7;ht_locks=3", prepare, cleanup),
         TestCase("set max_cas MB21190", set_max_cas_mb21190, test_setup, teardown, nullptr,
                  prepare, cleanup),
+        TestCase("warmup_mb21769", warmup_mb21769, test_setup, teardown, nullptr,
+                 prepare, cleanup),
 
         // sentinel
         TestCase(NULL, NULL, NULL, NULL, NULL, prepare, cleanup)