MB-20623: Remove 'fetches' vector from MutationLogHarvester::apply 43/68043/7
authorDave Rigby <daver@couchbase.com>
Tue, 23 Aug 2016 11:12:05 +0000 (12:12 +0100)
committerDave Rigby <daver@couchbase.com>
Fri, 30 Sep 2016 15:07:05 +0000 (15:07 +0000)
MutationLogHarvester::apply() builds a vector of keys (`fetches`) to
fetch from disk. This is essentially identical to the contents of
`committed`, except it only contains keys which currently exist in the
VBucket.

We can optimize this by removing fetches, and instead erasing the
elements from `committed[vb]` which are no longer valid. This also
means we no longer have to check for duplicates in batchWarmupCallback
(which was kinda pointless even before), as the data structure passed
in is now a set (and hence cannot by definition contain dupes).

Results in a reduction in the memory used by these temporary warmup
data structures - from 3876MB to 3218MB (17%) for the following
workload:

* 1 bucket, 10,000MB quota, 1 node.
* 30M items, 300bytes each. Dataset generated by:
    cbc-pillowfight -U couchbase://localhost:12000 -I 30000000 -m 300 -M 300 -t16

Change-Id: I1c2e480e53626be41fa20dcd44abe4d6fa327e4c
Reviewed-on: http://review.couchbase.org/68043
Reviewed-by: Daniel Owen <owend@couchbase.com>
Tested-by: buildbot <build@couchbase.com>
src/mutation_log.cc
src/mutation_log.h
src/warmup.cc

index 1a2d794..239117e 100644 (file)
@@ -914,23 +914,26 @@ void MutationLogHarvester::apply(void *arg, mlCallbackWithQueue mlc) {
         throw std::logic_error("MutationLogHarvester::apply: Cannot apply "
                 "when engine is NULL");
     }
-    std::vector<std::string> fetches;
     for (const uint16_t vb : vbid_set) {
         RCPtr<VBucket> vbucket = engine->getEpStore()->getVBucket(vb);
         if (!vbucket) {
             continue;
         }
-        for (const auto& key : committed[vb]) {
-            // Check item is a valid StoredValue in the HashTable before
-            // adding to fetches
-            if ((vbucket->ht.find(key, false) != nullptr)) {
-                fetches.push_back(key);
+
+        // Remove any items which are no longer valid in the VBucket.
+        for (auto it = committed[vb].begin(); it != committed[vb].end(); ) {
+            if ((vbucket->ht.find(*it, false) == nullptr)) {
+                it = committed[vb].erase(it);
+            }
+            else {
+                ++it;
             }
         }
-        if (!mlc(vb, fetches, arg)) {
+
+        if (!mlc(vb, committed[vb], arg)) {
             return;
         }
-        fetches.clear();
+        committed[vb].clear();
     }
 }
 
index 628fd7a..11199e1 100644 (file)
@@ -568,7 +568,7 @@ typedef std::pair<uint64_t, uint8_t> mutation_log_event_t;
  */
 typedef bool (*mlCallback)(void*, uint16_t, const std::string &);
 typedef bool (*mlCallbackWithQueue)(uint16_t,
-                                    std::vector<std::string> &,
+                                    const std::set<std::string>&,
                                     void *arg);
 
 /**
index f5bbbeb..d74c787 100644 (file)
@@ -45,7 +45,7 @@ struct WarmupCookie {
 };
 
 static bool batchWarmupCallback(uint16_t vbId,
-                                std::vector<std::string>& fetches,
+                                const std::set<std::string>& fetches,
                                 void *arg)
 {
     WarmupCookie *c = static_cast<WarmupCookie *>(arg);
@@ -53,10 +53,6 @@ static bool batchWarmupCallback(uint16_t vbId,
     if (!c->epstore->maybeEnableTraffic()) {
         vb_bgfetch_queue_t items2fetch;
         for (auto& key : fetches) {
-            // ignore duplicate keys, if any in access log
-            if (items2fetch.find(key) != items2fetch.end()) {
-                continue;
-            }
             VBucketBGFetchItem *fit = new VBucketBGFetchItem(NULL, false);
             vb_bgfetch_item_ctx_t& bg_itm_ctx = items2fetch[key];
             bg_itm_ctx.isMetaOnly = false;