MB-21617: Change CAS resolution to nanoseconds 08/69708/7
authorJim Walker <jim@couchbase.com>
Tue, 8 Nov 2016 11:15:58 +0000 (11:15 +0000)
committerDave Rigby <daver@couchbase.com>
Thu, 10 Nov 2016 07:26:48 +0000 (07:26 +0000)
This gives better compatibility with older datasets and a less
confusing experience if an old cluster did set_with_cas against
a new one.

The CAS is generated as a nanosecond value.

Drift is tracked though in microseconds to give a longer window before
we may overflow the counter. The config thresholds are also all ┬Ás
but converted to ns so we can correctly track drift.

A new max_cas_str is also added to assist the supportability of
LWW, so at a glance you can see what the max_cas means as a
date/time string.

Change-Id: I40fb89add968043aca01b1de103f62319d814a5c
Reviewed-on: http://review.couchbase.org/69708
Reviewed-by: Dave Rigby <daver@couchbase.com>
Tested-by: buildbot <build@couchbase.com>
docs/stats.org
src/hlc.h
src/vbucket.h
src/vbucketmap.cc
src/vbucketmap.h
tests/ep_testsuite.cc
tests/ep_testsuite_xdcr.cc

index df5a0e4..51b07ae 100644 (file)
@@ -336,11 +336,13 @@ For introductory information on stats within Couchbase, start with the
 | ep_cursors_dropped                 | Number of cursors dropped by the       |
 |                                    | checkpoint remover                     |
 | ep_active_hlc_drift                | The total absolute drift for all active|
-|                                    | vbuckets.                              |
+|                                    | vbuckets. This is microsecond          |
+|                                    | granularity.                           |
 | ep_active_hlc_drift_count          | The number of updates applied to       |
 |                                    | ep_active_hlc_drift.                   |
 | ep_replica_hlc_drift               | The total absolute drift for all       |
-|                                    | replica vbucket.                       |
+|                                    | replica vbuckets. This is microsecond  |
+|                                    | granularity.                           |
 | ep_replica_hlc_drift_count         | The number of updates applied to       |
 |                                    | ep_replica_hlc_drift.                  |
 | ep_active_ahead_exceptions         | The total number of ahead exceptions   |
@@ -491,18 +493,23 @@ The stats below are listed for each vbucket.
 | uuid                          | The current vbucket uuid                   |
 | rollback_item_count           | Num of items rolled back                   |
 | max_cas                       | Maximum CAS of all items in the vbucket.   |
+|                               | This is a hybrid logical clock value in    |
+|                               | nanoseconds.                               |
+| max_cas_str                   | max_cas as a time stamp string (seconds    |
+|                               | since epoch).                              |
 | total_abs_drift               | The accumulated absolute drift for this    |
-|                               | vbucket's hybrid logical clock.            |
+|                               | vbucket's hybrid logical clock in          |
+|                               | microseconds.                              |
 | total_abs_drift_count         | The number of updates applied to           |
 |                               | total_abs_drift.                           |
 | drift_ahead_threshold_exceeded| The number of HLC updates that had a value |
 |                               | ahead of the local HLC and were over the   |
 |                               | drift_ahead_threshold.                     |
-| drift_ahead_threshold         | The ahead threshold in us.                 |
+| drift_ahead_threshold         | The ahead threshold in ns.                 |
 |drift_behind_threshold_exceeded| The number of HLC updates that had a value |
 |                               | behind the local HLC and were over the     |
 |                               | drift_behind_threshold.                    |
-| drift_behind_threshold        | The behind threshold in us.                |
+| drift_behind_threshold        | The behind threshold in ns.                |
 | logical_clock_ticks           | How many times this vbucket's HLC has      |
 |                               | returned logical clock ticks.              |
 
index a1d2695..2235abd 100644 (file)
--- a/src/hlc.h
+++ b/src/hlc.h
@@ -22,6 +22,8 @@
 #include "atomic.h"
 #include "statwriter.h"
 
+#include <platform/checked_snprintf.h>
+
 /*
  * HLC manages a hybrid logical clock for 'time' stamping events.
  * The class only implements the send logic of the HLC algorithm.
@@ -51,19 +53,22 @@ public:
     /*
      * @param initHLC a HLC value to start from
      * @param aheadThresholdAhead threshold a peer can be ahead before we
-     *        increment driftAheadExceeded
+     *        increment driftAheadExceeded. Expressed in us.
      * @param behindThresholdhreshold a peer can be ahead before we
-     *        increment driftBehindExceeded
+     *        increment driftBehindExceeded. Expressed in us.
      */
-    HLC(uint64_t initHLC, uint64_t aheadThreshold, uint64_t behindThreshold)
+    HLC(uint64_t initHLC,
+        std::chrono::microseconds aheadThreshold,
+        std::chrono::microseconds behindThreshold)
         : maxHLC(initHLC),
           cummulativeDrift(0),
           cummulativeDriftIncrements(0),
           logicalClockTicks(0),
           driftAheadExceeded(0),
-          driftBehindExceeded(0),
-          driftAheadThreshold(aheadThreshold),
-          driftBehindThreshold(behindThreshold) {}
+          driftBehindExceeded(0) {
+        setDriftAheadThreshold(aheadThreshold);
+        setDriftBehindThreshold(behindThreshold);
+    }
 
     uint64_t nextHLC() {
         // Create a monotonic timestamp using part of the HLC algorithm by.
@@ -89,8 +94,12 @@ public:
         // Track the +/- difference between our time and their time
         int64_t difference = getMasked48(hlc) - timeNow;
 
-        // Accumulate the absolute drift
-        cummulativeDrift += std::abs(difference);
+        // Accumulate the absolute drift in microseconds not nanoseconds.
+        // E.g. 5s drift then has ~3.6 trillion updates before overflow vs 3.6
+        // million if we tracked in nanoseconds.
+        const auto ns = std::chrono::nanoseconds(std::abs(difference));
+        cummulativeDrift +=
+            std::chrono::duration_cast<std::chrono::microseconds>(ns).count();
         cummulativeDriftIncrements++;
 
         // If the difference is greater, count peer ahead exeception
@@ -126,23 +135,63 @@ public:
         return {driftAheadExceeded, driftBehindExceeded};
     }
 
-    void setDriftAheadThreshold(uint64_t threshold) {
-        driftAheadThreshold = threshold;
+    /*
+     * Set the drift threshold for ahead exception counting
+     * - externally we work in microseconds
+     * - internally we work in nanoseconds
+     */
+    void setDriftAheadThreshold(std::chrono::microseconds threshold) {
+        driftAheadThreshold =
+            std::chrono::duration_cast<std::chrono::nanoseconds>(threshold).count();
     }
 
-    void setDriftBehindThreshold(uint64_t threshold) {
-        driftBehindThreshold = threshold;
+    /*
+     * Set the drift threshold for behind exception counting
+     * - externally we work in (u) microseconds
+     * - internally we work in nanoseconds
+     */
+    void setDriftBehindThreshold(std::chrono::microseconds threshold) {
+        driftBehindThreshold =
+            std::chrono::duration_cast<std::chrono::nanoseconds>(threshold).count();
     }
 
     void addStats(const std::string& prefix, ADD_STAT add_stat, const void *c) const {
-        add_prefixed_stat(prefix.data(), "max_cas", getMaxHLC(), add_stat, c);
+        auto maxCas = getMaxHLC();
+        add_prefixed_stat(prefix.data(), "max_cas", maxCas, add_stat, c);
+
+        // Print max_cas as a UTC human readable string
+        auto nanoseconds = std::chrono::nanoseconds(maxCas);//duration
+        auto seconds = std::chrono::duration_cast<std::chrono::seconds>(nanoseconds);
+        time_t maxCasSeconds = seconds.count();
+
+        std::tm tm;
+        char timeString[100];
+        // Print as an ISO-8601 date format with nanosecond fractional part
+        if (cb_gmtime_r(&maxCasSeconds, &tm) == 0 &&
+            strftime(timeString, sizeof(timeString), "%Y-%m-%dT%H:%M:%S", &tm)) {
+            // Get the fractional nanosecond part
+            nanoseconds -= seconds;
+            try {
+                checked_snprintf(timeString, sizeof(timeString), "%s.%lld",
+                                 timeString, nanoseconds.count());
+            } catch (...) { /* genuinely do nothing, we will just see that the
+                        fraction is missing and max_cas is already printed */
+            }
+            add_prefixed_stat(prefix.data(), "max_cas_str", timeString, add_stat, c);
+        } else {
+            add_prefixed_stat(prefix.data(), "max_cas_str", "could not get string", add_stat, c);
+        }
+
         add_prefixed_stat(prefix.data(), "total_abs_drift", cummulativeDrift.load(), add_stat, c);
         add_prefixed_stat(prefix.data(), "total_abs_drift_count", cummulativeDriftIncrements.load(), add_stat, c);
         add_prefixed_stat(prefix.data(), "drift_ahead_threshold_exceeded", driftAheadExceeded.load(), add_stat, c);
-        add_prefixed_stat(prefix.data(), "drift_ahead_threshold", driftAheadThreshold.load(), add_stat, c);
         add_prefixed_stat(prefix.data(), "drift_behind_threshold_exceeded", driftBehindExceeded.load(), add_stat, c);
-        add_prefixed_stat(prefix.data(), "drift_behind_threshold", driftBehindThreshold.load(), add_stat, c);
         add_prefixed_stat(prefix.data(), "logical_clock_ticks", logicalClockTicks.load(), add_stat, c);
+
+        // These are printed "as is" so we know what is being compared. Do not convert to microseconds
+        add_prefixed_stat(prefix.data(), "drift_ahead_threshold", driftAheadThreshold.load(), add_stat, c);
+        add_prefixed_stat(prefix.data(), "drift_behind_threshold", driftBehindThreshold.load(), add_stat, c);
+
     }
 
     void resetStats() {
@@ -164,7 +213,7 @@ private:
 
     static int64_t getTime() {
         auto now = std::chrono::system_clock::now();
-        return std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
+        return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
     }
 
     /*
index bec3b5f..522fd04 100644 (file)
@@ -181,8 +181,8 @@ public:
         tempFilter(NULL),
         rollbackItemCount(0),
         hlc(maxCas,
-            config.getHlcAheadThresholdUs(),
-            config.getHlcBehindThresholdUs()),
+            std::chrono::microseconds(config.getHlcAheadThresholdUs()),
+            std::chrono::microseconds(config.getHlcBehindThresholdUs())),
         statPrefix("vb_" + std::to_string(i))
     {
         backfill.isBackfillPhase = false;
@@ -259,11 +259,11 @@ public:
         return hlc.getDriftExceptionCounters();
     }
 
-    void setHLCDriftAheadThreshold(uint64_t threshold) {
+    void setHLCDriftAheadThreshold(std::chrono::microseconds threshold) {
         hlc.setDriftAheadThreshold(threshold);
     }
 
-    void setHLCDriftBehindThreshold(uint64_t threshold) {
+    void setHLCDriftBehindThreshold(std::chrono::microseconds threshold) {
         hlc.setDriftBehindThreshold(threshold);
     }
 
index 1f3bf0d..cff9d3a 100644 (file)
@@ -201,7 +201,7 @@ size_t VBucketMap::getNumShards() const {
     return shards.size();
 }
 
-void VBucketMap::setHLCDriftAheadThreshold(uint64_t threshold) {
+void VBucketMap::setHLCDriftAheadThreshold(std::chrono::microseconds threshold) {
     for (id_type id = 0; id < size; id++) {
         auto vb = getBucket(id);
         if (vb) {
@@ -210,7 +210,7 @@ void VBucketMap::setHLCDriftAheadThreshold(uint64_t threshold) {
     }
 }
 
-void VBucketMap::setHLCDriftBehindThreshold(uint64_t threshold) {
+void VBucketMap::setHLCDriftBehindThreshold(std::chrono::microseconds threshold) {
     for (id_type id = 0; id < size; id++) {
         auto vb = getBucket(id);
         if (vb) {
@@ -222,8 +222,8 @@ void VBucketMap::setHLCDriftBehindThreshold(uint64_t threshold) {
 void VBucketMap::VBucketConfigChangeListener::sizeValueChanged(const std::string &key,
                                                    size_t value) {
     if (key == "hlc_drift_ahead_threshold_us") {
-        map.setHLCDriftAheadThreshold(value);
+        map.setHLCDriftAheadThreshold(std::chrono::microseconds(value));
     } else if (key == "hlc_drift_behind_threshold_us") {
-        map.setHLCDriftBehindThreshold(value);
+        map.setHLCDriftBehindThreshold(std::chrono::microseconds(value));
     }
 }
\ No newline at end of file
index 0eb128c..27d67ca 100644 (file)
@@ -74,8 +74,8 @@ public:
     KVShard* getShardByVbId(id_type id) const;
     KVShard* getShard(KVShard::id_type shardId) const;
     size_t getNumShards() const;
-    void setHLCDriftAheadThreshold(uint64_t threshold);
-    void setHLCDriftBehindThreshold(uint64_t threshold);
+    void setHLCDriftAheadThreshold(std::chrono::microseconds threshold);
+    void setHLCDriftBehindThreshold(std::chrono::microseconds threshold);
 
     // Returns the current state of the given vBucket.
     vbucket_state getVBucketState(VBucket::id_type id) const;
index 5468006..b40852c 100644 (file)
@@ -5900,6 +5900,7 @@ static enum test_result test_mb19687_fixed(ENGINE_HANDLE* h,
                 "vb_0:ht_memory",
                 "vb_0:logical_clock_ticks",
                 "vb_0:max_cas",
+                "vb_0:max_cas_str",
                 "vb_0:num_ejects",
                 "vb_0:num_items",
                 "vb_0:num_non_resident",
index 5500492..b6f8699 100644 (file)
@@ -1573,7 +1573,9 @@ static enum test_result test_set_with_meta_and_check_drift_stats(ENGINE_HANDLE *
     const int behindVb = n_vbuckets/3;
     checkne(aheadVb, behindVb, "Cannot have the same VB as ahead/behind");
 
-    HLC hlc(0/*init HLC*/, 0/*ahead threshold*/, 0/*behind threshold*/);
+    HLC hlc(0/*init HLC*/,
+            std::chrono::microseconds(0)/*ahead threshold*/,
+            std::chrono::microseconds(0)/*behind threshold*/);
 
     // grab the drift behind threshold
     uint64_t driftBehindThreshold = get_ull_stat(h, h1,
@@ -1655,7 +1657,9 @@ static enum test_result test_del_with_meta_and_check_drift_stats(ENGINE_HANDLE *
     const int behindVb = n_vbuckets/3;
     checkne(aheadVb, behindVb, "Cannot have the same VB as ahead/behind");
 
-    HLC hlc(0/*init HLC*/, 0/*ahead threshold*/, 0/*behind threshold*/);
+    HLC hlc(0/*init HLC*/,
+            std::chrono::microseconds(0)/*ahead threshold*/,
+            std::chrono::microseconds(0)/*behind threshold*/);
 
     // grab the drift behind threshold
     uint64_t driftBehindThreshold = get_ull_stat(h, h1,