MB-20623: Remove unused fields from temporary warmup data structures 42/68042/7
authorDave Rigby <daver@couchbase.com>
Mon, 22 Aug 2016 14:43:29 +0000 (15:43 +0100)
committerDave Rigby <daver@couchbase.com>
Fri, 30 Sep 2016 15:06:56 +0000 (15:06 +0000)
Warmup currently creates three data structures (per vBucket) in memory
when warming up using the access.log:

1. A map of keys -> sequence number at the time the access log was
   generated - MutationLogHarvester::committed

2. A list of {key, hashTable_seqNo} pairs -
   MutationLogHarvester::apply::fetches

3. A map of key -> VBucketBGFetchItem for every key in the list above
- batchWarmupCallback::items2fetch

There are a number of inefficiencies in this implementation, the first
of which is that we record sequence numbers in the first two data
structures which are never actually used - the final BGfetch doesn't
need them.

This patch therefore removes the recording of sequence numbers. This
changes the data structures to:

1. MutationLogHarvester::committed - a set of keys (per vBucket).
2. MutationLogHarvester::apply::fetches - a vector of keys.

Results in a reduction in the memory used by these temporary warmup
data structures - from 4356MB to 3876MB (11%) for the following
workload:

* 1 bucket, 10,000MB quota, 1 node.
* 30M items, 300bytes each. Dataset generated by:
    cbc-pillowfight -U couchbase://localhost:12000 -I 30000000 -m 300 -M 300 -t16

Change-Id: I0666e2d1a8a0d9e996cdbdd61d41118d2c2d6dfc
Reviewed-on: http://review.couchbase.org/68042
Reviewed-by: Daniel Owen <owend@couchbase.com>
Tested-by: buildbot <build@couchbase.com>
src/mutation_log.cc
src/mutation_log.h
src/warmup.cc

index a126426..1a2d794 100644 (file)
@@ -869,36 +869,24 @@ void MutationLog::resetCounts(size_t *items) {
 bool MutationLogHarvester::load() {
     bool clean(false);
     std::set<uint16_t> shouldClear;
-    for (MutationLog::iterator it(mlog.begin()); it != mlog.end(); ++it) {
-        const MutationLogEntry *le = *it;
+    for (const MutationLogEntry* le : mlog) {
         ++itemsSeen[le->type()];
         clean = false;
 
         switch (le->type()) {
         case ML_NEW:
             if (vbid_set.find(le->vbucket()) != vbid_set.end()) {
-                loading[le->vbucket()][le->key()] =
-                                      std::make_pair(le->rowid(), le->type());
+                loading[le->vbucket()].emplace(le->key());
             }
             break;
-        case ML_COMMIT2: {
+        case ML_COMMIT2:
             clean = true;
 
             for (const uint16_t vb : vbid_set) {
-                for (auto& copyit2 : loading[vb]) {
-
-                    mutation_log_event_t t = copyit2.second;
-
-                    switch (t.second) {
-                    case ML_NEW:
-                        committed[vb][copyit2.first] = t.first;
-                        break;
-                    default:
-                        abort();
-                    }
+                for (auto& item : loading[vb]) {
+                    committed[vb].emplace(item);
                 }
             }
-        }
             loading.clear();
             break;
         case ML_COMMIT1:
@@ -913,8 +901,7 @@ bool MutationLogHarvester::load() {
 
 void MutationLogHarvester::apply(void *arg, mlCallback mlc) {
     for (const uint16_t vb : vbid_set) {
-        for (const auto& it2 : committed[vb]) {
-            const std::string& key = it2.first;
+        for (const auto& key : committed[vb]) {
             if (!mlc(arg, vb, key)) { // Stop loading from an access log
                 return;
             }
@@ -927,20 +914,17 @@ void MutationLogHarvester::apply(void *arg, mlCallbackWithQueue mlc) {
         throw std::logic_error("MutationLogHarvester::apply: Cannot apply "
                 "when engine is NULL");
     }
-    std::vector<std::pair<std::string, uint64_t> > fetches;
+    std::vector<std::string> fetches;
     for (const uint16_t vb : vbid_set) {
-
         RCPtr<VBucket> vbucket = engine->getEpStore()->getVBucket(vb);
         if (!vbucket) {
             continue;
         }
-
-        for (const auto& it2 : committed[vb]) {
-            // cannot use rowid from access log, so must read from hashtable
-            const std::string& key = it2.first;
-            StoredValue *v = NULL;
-            if ((v = vbucket->ht.find(key, false))) {
-                fetches.push_back(std::make_pair(it2.first, v->getBySeqno()));
+        for (const auto& key : committed[vb]) {
+            // Check item is a valid StoredValue in the HashTable before
+            // adding to fetches
+            if ((vbucket->ht.find(key, false) != nullptr)) {
+                fetches.push_back(key);
             }
         }
         if (!mlc(vb, fetches, arg)) {
index 6c36ee3..628fd7a 100644 (file)
@@ -568,8 +568,8 @@ typedef std::pair<uint64_t, uint8_t> mutation_log_event_t;
  */
 typedef bool (*mlCallback)(void*, uint16_t, const std::string &);
 typedef bool (*mlCallbackWithQueue)(uint16_t,
-                    std::vector<std::pair<std::string, uint64_t> > &,
-                    void *arg);
+                                    std::vector<std::string> &,
+                                    void *arg);
 
 /**
  * Type for mutation log leftovers.
@@ -632,9 +632,7 @@ private:
     EventuallyPersistentEngine *engine;
     std::set<uint16_t> vbid_set;
 
-    std::unordered_map<uint16_t,
-                       std::unordered_map<std::string, uint64_t> > committed;
-    std::unordered_map<uint16_t,
-                       std::unordered_map<std::string, mutation_log_event_t> > loading;
+    std::unordered_map<uint16_t, std::set<std::string>> committed;
+    std::unordered_map<uint16_t, std::set<std::string>> loading;
     size_t itemsSeen[MUTATION_LOG_TYPES];
 };
index c823afe..f5bbbeb 100644 (file)
@@ -45,23 +45,20 @@ struct WarmupCookie {
 };
 
 static bool batchWarmupCallback(uint16_t vbId,
-                                std::vector<std::pair<std::string,
-                                uint64_t> > &fetches,
+                                std::vector<std::string>& fetches,
                                 void *arg)
 {
     WarmupCookie *c = static_cast<WarmupCookie *>(arg);
 
     if (!c->epstore->maybeEnableTraffic()) {
         vb_bgfetch_queue_t items2fetch;
-        std::vector<std::pair<std::string, uint64_t> >::iterator itm =
-                                                              fetches.begin();
-        for (; itm != fetches.end(); itm++) {
+        for (auto& key : fetches) {
             // ignore duplicate keys, if any in access log
-            if (items2fetch.find((*itm).first) != items2fetch.end()) {
+            if (items2fetch.find(key) != items2fetch.end()) {
                 continue;
             }
             VBucketBGFetchItem *fit = new VBucketBGFetchItem(NULL, false);
-            vb_bgfetch_item_ctx_t& bg_itm_ctx = items2fetch[(*itm).first];
+            vb_bgfetch_item_ctx_t& bg_itm_ctx = items2fetch[key];
             bg_itm_ctx.isMetaOnly = false;
             bg_itm_ctx.bgfetched_list.push_back(fit);
         }