[BP] MB-18580: Wait for VB state to be persisted before starting tests 98/65298/5
authorDave Rigby <daver@couchbase.com>
Wed, 4 May 2016 09:49:59 +0000 (10:49 +0100)
committerDave Rigby <daver@couchbase.com>
Mon, 4 Jul 2016 08:58:42 +0000 (08:58 +0000)
Intermittent test failures (across multiple tests) have been seen
where we fail to read the number of items in vbucket disk file:

    terminate called after throwing an instance of 'std::invalid_argument'
    what(): CouchKVStore::getDbFileInfo: Failed to open database file for vBucket = 1 rev = 1 with error:no such file

The issue is that we do not correctly wait for the vBucket files to be
created before starting a test. We /attempt/ to wait in test_setup,
waiting for ep_vb_snapshot_total to be non-zero, however this stat is
not updated when vBuckets are written to disk, instead only when the
vb state snapshot occurs.

To fix this, create a new histogram stat - ep_vb_persist_state_total -
which records how long the actual persist takes (and counts then at
the same time). Change test_setup to check for this stat becoming 1
before continuing.

Results in two new stats:

* disk_persist_vbstate - timing histogram of how long vbState
                          operations took.

* ep_persist_vbstate_total - count of how many VBStatePersists have
                             occurred.

Change-Id: Ic24e6cdb51a98ea6fa65005158242bfcf44225d0
Reviewed-on: http://review.couchbase.org/65298
Reviewed-by: Dave Rigby <daver@couchbase.com>
Well-Formed: buildbot <build@couchbase.com>
Tested-by: buildbot <build@couchbase.com>
docs/stats.org
src/ep.cc
src/ep_engine.cc
src/stats.h
tests/ep_testsuite.cc

index 56ef5c6..0dc68aa 100644 (file)
@@ -326,21 +326,22 @@ For introductory information on stats within Couchbase, start with the
 
 ** vBucket total stats
 
-| Stat                          | Description                                |
-|-------------------------------+--------------------------------------------|
-| ep_vb_total                   | Total vBuckets (count)                     |
-| curr_items_tot                | Total number of items                      |
-| curr_items                    | Number of active items in memory           |
-| curr_temp_items               | Number of temporary items in memory        |
-| vb_dead_num                   | Number of dead vBuckets                    |
-| ep_diskqueue_items            | Total items in disk queue                  |
-| ep_diskqueue_memory           | Total memory used in disk queue            |
-| ep_diskqueue_fill             | Total enqueued items on disk queue         |
-| ep_diskqueue_drain            | Total drained items on disk queue          |
-| ep_diskqueue_pending          | Total bytes of pending writes              |
-| ep_vb_snapshot_total          | Total VB state snapshots persisted in disk |
-| ep_meta_data_memory           | Total memory used by meta data             |
-| ep_meta_data_disk             | Total disk used by meta data               |
+| Stat                     | Description                                    |
+|--------------------------+------------------------------------------------|
+| ep_vb_total              | Total vBuckets (count)                         |
+| curr_items_tot           | Total number of items                          |
+| curr_items               | Number of active items in memory               |
+| curr_temp_items          | Number of temporary items in memory            |
+| vb_dead_num              | Number of dead vBuckets                        |
+| ep_diskqueue_items       | Total items in disk queue                      |
+| ep_diskqueue_memory      | Total memory used in disk queue                |
+| ep_diskqueue_fill        | Total enqueued items on disk queue             |
+| ep_diskqueue_drain       | Total drained items on disk queue              |
+| ep_diskqueue_pending     | Total bytes of pending writes                  |
+| ep_vb_snapshot_total     | Number of VB state snapshots persisted to disk |
+| ep_persist_vbstate_total | Total VB persist state to disk                 |
+| ep_meta_data_memory      | Total memory used by meta data                 |
+| ep_meta_data_disk        | Total disk used by meta data                   |
 
 *** Active vBucket class stats
 
index 9902709..76bde3b 100644 (file)
--- a/src/ep.cc
+++ b/src/ep.cc
@@ -1135,6 +1135,8 @@ bool EventuallyPersistentStore::persistVBState(const Priority &priority,
         }
     }
 
+    const hrtime_t start = gethrtime();
+
     KVStatsCallback kvcb(this);
     uint64_t chkId = vbMap.getPersistenceCheckpointId(vbid);
     std::string failovers = vb->failovers->toJSON();
@@ -1148,6 +1150,7 @@ bool EventuallyPersistentStore::persistVBState(const Priority &priority,
 
     KVStore *rwUnderlying = getRWUnderlying(vbid);
     if (rwUnderlying->snapshotVBucket(vbid, vb_state, &kvcb)) {
+        stats.persistVBStateHisto.add((gethrtime() - start) / 1000);
         if (vbMap.setBucketCreation(vbid, false)) {
             LOG(EXTENSION_LOG_INFO, "VBucket %d created", vbid);
         }
index ea997ef..a822d1a 100644 (file)
@@ -3278,6 +3278,9 @@ ENGINE_ERROR_CODE EventuallyPersistentEngine::doEngineStats(const void *cookie,
     add_casted_stat("ep_vb_snapshot_total",
                     epstats.snapshotVbucketHisto.total(), add_stat, cookie);
 
+    add_casted_stat("ep_persist_vbstate_total",
+                    epstats.persistVBStateHisto.total(), add_stat, cookie);
+
     add_casted_stat("ep_vb_total",
                     activeCountVisitor.getVBucketNumber() +
                     replicaCountVisitor.getVBucketNumber() +
@@ -4205,6 +4208,8 @@ ENGINE_ERROR_CODE EventuallyPersistentEngine::doTimingStats(const void *cookie,
     add_casted_stat("disk_commit", stats.diskCommitHisto, add_stat, cookie);
     add_casted_stat("disk_vbstate_snapshot", stats.snapshotVbucketHisto,
                     add_stat, cookie);
+    add_casted_stat("disk_persist_vbstate", stats.persistVBStateHisto,
+                    add_stat, cookie);
 
     add_casted_stat("item_alloc_sizes", stats.itemAllocSizeHisto,
                     add_stat, cookie);
index 49e256e..2010d2c 100644 (file)
@@ -513,6 +513,9 @@ public:
     //! Histogram of setting vbucket state
     Histogram<hrtime_t> snapshotVbucketHisto;
 
+    //! Histogram of persisting vbucket state
+    Histogram<hrtime_t> persistVBStateHisto;
+
     //! Histogram of mutation log compactor
     Histogram<hrtime_t> mlogCompactorHisto;
 
@@ -591,7 +594,8 @@ public:
         diskDelHisto.reset();
         diskVBDelHisto.reset();
         diskCommitHisto.reset();
-
+        snapshotVbucketHisto.reset();
+        persistVBStateHisto.reset();
         itemAllocSizeHisto.reset();
         dirtyAgeHisto.reset();
         mlogCompactorHisto.reset();
index 3cbe08b..31055cf 100644 (file)
@@ -230,7 +230,9 @@ static bool test_setup(ENGINE_HANDLE *h, ENGINE_HANDLE_V1 *h1) {
               "Failed to set VB0 state.");
     }
 
-    wait_for_stat_change(h, h1, "ep_vb_snapshot_total", 0);
+    // Wait for vb0's state (active) to be persisted to disk, that way
+    // we know the KVStore files exist on disk.
+    wait_for_stat_to_be(h, h1, "ep_persist_vbstate_total", 1);
 
     // warmup is complete, notify ep engine that it must now enable
     // data traffic