1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 * Copyright 2016 Couchbase, Inc
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
21 #include "storeddockey.h"
22 #include "stored-value.h"
23 #include <platform/non_negative_counter.h>
25 class AbstractStoredValueFactory;
26 class HashTableStatVisitor;
27 class HashTableVisitor;
28 class HashTableDepthVisitor;
29 class PauseResumeHashTableVisitor;
32 * Mutation types as returned by store commands.
34 enum class MutationStatus : uint16_t {
35 NotFound, //!< The item was not found for update
36 InvalidCas, //!< The wrong CAS identifier was sent for a CAS update
37 WasClean, //!< The item was clean before this mutation
38 WasDirty, //!< This item was already dirty before this mutation
39 IsLocked, //!< The item is locked and can't be updated.
40 NoMem, //!< Insufficient memory to store this item.
41 NeedBgFetch //!< Require a bg fetch to process SET op
45 * Result from add operation.
47 enum class AddStatus : uint16_t {
48 Success, //!< Add was successful.
49 NoMem, //!< No memory for operation
50 Exists, //!< Did not update -- item exists with this key
51 UnDel, //!< Undeletes an existing dirty item
52 AddTmpAndBgFetch, //!< Create a tmp item and schedule a bg metadata fetch
53 BgFetch //!< Schedule a bg metadata fetch to process ADD op
57 * A container of StoredValue instances.
59 * The HashTable class is an unordered, associative array which maps
60 * StoredDocKeys to StoredValue.
62 * It supports a limited degree of concurrent access - the underlying
63 * HashTable buckets are guarded by N ht_locks; where N is typically of the
64 * order of the number of CPUs. Essentially ht bucket B is guarded by
67 * StoredValue objects can have their value (Blob object) ejected, making the
68 * value non-resident. Such StoredValues are still in the HashTable, and their
69 * metadata (CAS, revSeqno, bySeqno, etc) is still accessible, but the value
70 * cannot be accessed until it is fetched from disk. This is value eviction.
72 * Additionally, we can eject the whole StoredValue from the HashTable.
73 * We no longer know if the item even exists from the HashTable, and need to go
74 * to disk to definitively know if it exists or not. Note that even though the
75 * HashTable has no direct knowledge of the item now, it /does/ track the total
76 * number of items which exist but are not resident (see numNonResidentItems).
77 * This is full eviction.
79 * The HashTable can hold StoredValues which are either 'valid' (i.e. represent
80 * the current state of that key), or which are logically deleted. Typically
81 * StoredValues which are deleted are only held in the HashTable for a short
82 * period of time - until the deletion is recorded on disk by the Flusher, at
83 * which point they are removed from the HashTable by PersistenceCallback (we
84 * don't want to unnecessarily spend memory on items which have been deleted).
90 * Represents a position within the hashtable.
92 * Currently opaque (and constant), clients can pass them around but
93 * cannot reposition the iterator.
97 // Allow default construction positioned at the start,
99 Position() : ht_size(0), lock(0), hash_bucket(0) {}
101 bool operator==(const Position& other) const {
102 return (ht_size == other.ht_size) &&
103 (lock == other.lock) &&
104 (hash_bucket == other.hash_bucket);
107 bool operator!=(const Position& other) const {
108 return ! (*this == other);
112 Position(size_t ht_size_, int lock_, int hash_bucket_)
115 hash_bucket(hash_bucket_) {}
117 // Size of the hashtable when the position was created.
119 // Lock ID we are up to.
121 // hash bucket ID (under the given lock) we are up to.
124 friend class HashTable;
125 friend std::ostream& operator<<(std::ostream& os, const Position& pos);
129 * Represents a locked hash bucket that provides RAII semantics for the lock
131 * A simple container which holds a lock and the bucket_num of the
132 * hashtable bucket it has the lock for.
134 class HashBucketLock {
139 HashBucketLock(int bucketNum, std::mutex& mutex)
140 : bucketNum(bucketNum), htLock(mutex) {
143 HashBucketLock(HashBucketLock&& other)
144 : bucketNum(other.bucketNum), htLock(std::move(other.htLock)) {
147 HashBucketLock(const HashBucketLock& other) = delete;
149 int getBucketNum() const {
153 const std::unique_lock<std::mutex>& getHTLock() const {
157 std::unique_lock<std::mutex>& getHTLock() {
163 std::unique_lock<std::mutex> htLock;
167 * Create a HashTable.
169 * @param st the global stats reference
170 * @param svFactory Factory to use for constructing stored values
171 * @param s the number of hash table buckets
172 * @param l the number of locks in the hash table
174 HashTable(EPStats& st,
175 std::unique_ptr<AbstractStoredValueFactory> svFactory,
181 size_t memorySize() {
182 return sizeof(HashTable)
183 + (size * sizeof(StoredValue*))
184 + (n_locks * sizeof(std::mutex));
188 * Get the number of hash table buckets this hash table has.
190 size_t getSize(void) { return size; }
193 * Get the number of locks in this hash table.
195 size_t getNumLocks(void) { return n_locks; }
198 * Get the number of in-memory non-resident and resident items within
201 size_t getNumInMemoryItems() const {
206 * Get the number of deleted items in the hash table.
208 size_t getNumDeletedItems() const {
209 return numDeletedItems;
213 * Get the number of in-memory non-resident items within this hash table.
215 size_t getNumInMemoryNonResItems() const { return numNonResidentItems; }
218 * Get the number of non-resident and resident items managed by
219 * this hash table. Includes items marked as deleted.
220 * Note that this will be equal to numItems if
221 * VALUE_ONLY_EVICTION is chosen as a cache management.
223 size_t getNumItems(void) const {
224 return numTotalItems;
227 void setNumTotalItems(size_t totalItems) {
228 numTotalItems = totalItems;
231 void decrNumItems() {
235 void decrNumTotalItems() {
239 void decrNumNonResidentItems() {
240 --numNonResidentItems;
244 * Get the number of items whose values are ejected from this hash table.
246 size_t getNumEjects(void) { return numEjects; }
249 * Get the total item memory size in this hash table.
251 size_t getItemMemory(void) { return memSize; }
254 * Clear the hash table.
256 * @param deactivate true when this hash table is being destroyed completely
258 void clear(bool deactivate = false);
261 * Get the number of times this hash table has been resized.
263 size_t getNumResizes() { return numResizes; }
266 * Get the number of temp. items within this hash table.
268 size_t getNumTempItems(void) { return numTempItems; }
271 * Automatically resize to fit the current data.
276 * Resize to the specified size.
278 void resize(size_t to);
281 * Find the item with the given key.
283 * @param key the key to find
284 * @param trackReference whether to track the reference or not
285 * @param wantsDeleted whether a deleted value needs to be returned
287 * @return a pointer to a StoredValue -- NULL if not found
289 StoredValue* find(const DocKey& key,
290 TrackReference trackReference,
291 WantsDeleted wantsDeleted);
294 * Find a resident item
296 * @param rnd a randomization input
297 * @return an item -- NULL if not fount
299 std::unique_ptr<Item> getRandomKey(long rnd);
302 * Set an Item into the this hashtable
304 * @param val the Item to store
306 * @return a result indicating the status of the store
308 MutationStatus set(Item& val);
311 * Updates an existing StoredValue in the HT.
312 * Assumes that HT bucket lock is grabbed.
314 * @param htLock Hash table lock that must be held.
315 * @param v Reference to the StoredValue to be updated.
316 * @param itm Item to be updated.
318 * @return Result indicating the status of the operation
320 MutationStatus unlocked_updateStoredValue(
321 const std::unique_lock<std::mutex>& htLock,
326 * Adds a new StoredValue in the HT.
327 * Assumes that HT bucket lock is grabbed.
329 * @param hbl Hash table bucket lock that must be held.
330 * @param itm Item to be added.
332 * @return Ptr of the StoredValue added. This function always succeeds and
333 * returns non-null ptr
335 StoredValue* unlocked_addNewStoredValue(const HashBucketLock& hbl,
339 * Replaces a StoredValue in the HT with its copy, and releases the
340 * ownership of the StoredValue.
341 * We need this when we want to update (or soft delete) the StoredValue
342 * without an item for the update (or soft delete) and still keep around
344 * Assumes that HT bucket lock is grabbed.
346 * @param hbl Hash table bucket lock that must be held.
347 * @param vToCopy StoredValue to be replaced by its copy.
349 * @return Ptr of the copy of the StoredValue added. This is owned by the
351 * UniquePtr to the StoredValue replaced by its copy. This is NOT
352 * owned by the hash table anymore.
354 std::pair<StoredValue*, StoredValue::UniquePtr> unlocked_replaceByCopy(
355 const HashBucketLock& hbl, const StoredValue& vToCopy);
357 * Logically (soft) delete the item in ht
358 * Assumes that HT bucket lock is grabbed.
359 * Also assumes that v is in the hash table.
361 * @param htLock Hash table lock that must be held
362 * @param v Reference to the StoredValue to be soft deleted
363 * @param onlyMarkDeleted indicates if we must reset the StoredValue or
366 void unlocked_softDelete(const std::unique_lock<std::mutex>& htLock,
368 bool onlyMarkDeleted);
371 * Find an item within a specific bucket assuming you already
374 * @param key the key of the item to find
375 * @param bucket_num the bucket number
376 * @param wantsDeleted true if soft deleted items should be returned
378 * @return a pointer to a StoredValue -- NULL if not found
380 StoredValue* unlocked_find(const DocKey& key,
382 WantsDeleted wantsDeleted,
383 TrackReference trackReference);
386 * Get a lock holder holding a lock for the given bucket
388 * @param bucket the bucket number to lock
389 * @return HashBucektLock which contains a lock and the hash bucket number
391 inline HashBucketLock getLockedBucket(int bucket) {
392 return HashBucketLock(bucket, mutexes[mutexForBucket(bucket)]);
396 * Get a lock holder holding a lock for the bucket for the given
399 * @param h the input hash
400 * @return HashBucketLock which contains a lock and the hash bucket number
402 inline HashBucketLock getLockedBucketForHash(int h) {
405 throw std::logic_error("HashTable::getLockedBucket: "
406 "Cannot call on a non-active object");
408 int bucket = getBucketForHash(h);
409 HashBucketLock rv(bucket, mutexes[mutexForBucket(bucket)]);
410 if (bucket == getBucketForHash(h)) {
417 * Get a lock holder holding a lock for the bucket for the hash of
421 * @return HashBucektLock which contains a lock and the hash bucket number
423 inline HashBucketLock getLockedBucket(const DocKey& key) {
425 throw std::logic_error("HashTable::getLockedBucket: Cannot call on a "
426 "non-active object");
428 return getLockedBucketForHash(key.hash());
432 * Delete a key from the cache without trying to lock the cache first
433 * (Please note that you <b>MUST</b> acquire the mutex before calling
436 * @param hbl HashBucketLock that must be held
437 * @param key the key to delete
439 void unlocked_del(const HashBucketLock& hbl, const DocKey& key);
442 * Visit all items within this hashtable.
444 void visit(HashTableVisitor &visitor);
447 * Visit all items within this call with a depth visitor.
449 void visitDepth(HashTableDepthVisitor &visitor);
452 * Visit the items in this hashtable, starting the iteration from the
453 * given startPosition and allowing the visit to be paused at any point.
455 * During visitation, the visitor object can request that the visit
456 * is stopped after the current item. The position passed to the
457 * visitor can then be used to restart visiting at the *APPROXIMATE*
458 * same position as it paused.
459 * This is approximate as hashtable locks are released when the
460 * function returns, so any changes to the hashtable may cause the
461 * visiting to restart at the slightly different place.
463 * As a consequence, *DO NOT USE THIS METHOD* if you need to guarantee
464 * that all items are visited!
466 * @param visitor The visitor object to use.
467 * @param start_pos At what position to start in the hashtable.
468 * @return The final HashTable position visited; equal to
469 * HashTable::end() if all items were visited otherwise the
470 * position to resume from.
472 Position pauseResumeVisit(PauseResumeHashTableVisitor& visitor,
473 Position& start_pos);
476 * Return a position at the end of the hashtable. Has similar semantics
477 * as STL end() (i.e. one past the last element).
479 Position endPosition() const;
482 * Get the number of buckets that should be used for initialization.
484 * @param s if 0, return the default number of buckets, else return s
486 static size_t getNumBuckets(size_t s = 0);
489 * Get the number of locks that should be used for initialization.
491 * @param s if 0, return the default number of locks, else return s
493 static size_t getNumLocks(size_t s);
496 * Set the default number of buckets.
498 static void setDefaultNumBuckets(size_t);
501 * Set the default number of locks.
503 static void setDefaultNumLocks(size_t);
506 * Get the max deleted revision seqno seen so far.
508 uint64_t getMaxDeletedRevSeqno() const {
509 return maxDeletedRevSeqno.load();
513 * Set the max deleted seqno (required during warmup).
515 void setMaxDeletedRevSeqno(const uint64_t seqno) {
516 maxDeletedRevSeqno.store(seqno);
520 * Update maxDeletedRevSeqno to a (possibly) new value.
522 void updateMaxDeletedRevSeqno(const uint64_t seqno) {
523 atomic_setIfBigger(maxDeletedRevSeqno, seqno);
527 * Eject an item meta data and value from memory.
528 * @param vptr the reference to the pointer to the StoredValue instance.
529 * This is passed as a reference as it may be modified by this
530 * function (see note below).
531 * @param policy item eviction policy
532 * @return true if an item is ejected.
534 * NOTE: Upon a successful ejection (and if full eviction is enabled)
535 * the StoredValue will be deleted, therefore it is *not* safe to
536 * access vptr after calling this function if it returned true.
538 bool unlocked_ejectItem(StoredValue*& vptr, item_eviction_policy_t policy);
541 * Restore the value for the item.
542 * Assumes that HT bucket lock is grabbed.
544 * @param htLock Hash table lock that must be held
545 * @param itm the item to be restored
546 * @param v corresponding StoredValue
548 * @return true if restored; else false
550 bool unlocked_restoreValue(const std::unique_lock<std::mutex>& htLock,
555 * Restore the metadata of of a temporary item upon completion of a
557 * Assumes that HT bucket lock is grabbed.
559 * @param htLock Hash table lock that must be held
560 * @param itm the Item whose metadata is being restored
561 * @param v corresponding StoredValue
563 void unlocked_restoreMeta(const std::unique_lock<std::mutex>& htLock,
568 * Releases an item(StoredValue) in the hash table, but does not delete it.
569 * It will pass out the removed item to the caller who can decide whether to
571 * Once removed the item will not be found if we look in the HT later, even
572 * if the item is not deleted. Hence the deletion of this
573 * removed StoredValue must be handled by another (maybe calling) module.
574 * Assumes that the hash bucket lock is already held.
576 * @param hbl HashBucketLock that must be held
577 * @param key the key to delete
579 * @return the StoredValue that is removed from the HT
581 StoredValue::UniquePtr unlocked_release(const HashBucketLock& hbl,
584 std::atomic<uint64_t> maxDeletedRevSeqno;
585 std::atomic<size_t> numTotalItems;
586 cb::NonNegativeCounter<size_t> numNonResidentItems;
587 cb::NonNegativeCounter<size_t> numDeletedItems;
588 std::array<cb::NonNegativeCounter<size_t>, mcbp::datatype::highest + 1>
590 std::atomic<size_t> numEjects;
591 //! Memory consumed by items in this hashtable.
592 std::atomic<size_t> memSize;
594 std::atomic<size_t> cacheSize;
596 std::atomic<size_t> metaDataMemory;
599 // The container for actually holding the StoredValues.
600 using table_type = std::vector<StoredValue::UniquePtr>;
602 friend class StoredValue;
603 friend std::ostream& operator<<(std::ostream& os, const HashTable& ht);
605 inline bool isActive() const { return activeState; }
606 inline void setActiveState(bool newv) { activeState = newv; }
608 std::atomic<size_t> size;
613 std::unique_ptr<AbstractStoredValueFactory> valFact;
614 std::atomic<size_t> visitors;
615 cb::NonNegativeCounter<size_t> numItems;
616 std::atomic<size_t> numResizes;
617 std::atomic<size_t> numTempItems;
620 static size_t defaultNumBuckets;
621 static size_t defaultNumLocks;
623 int getBucketForHash(int h) {
624 return abs(h % static_cast<int>(size));
627 inline size_t mutexForBucket(size_t bucket_num) {
629 throw std::logic_error("HashTable::mutexForBucket: Cannot call on a "
630 "non-active object");
632 return bucket_num % n_locks;
635 std::unique_ptr<Item> getRandomKeyFromSlot(int slot);
637 /** Searches for the first element in the specified hashChain which matches
638 * predicate p, and unlinks it from the chain.
640 * @param chain Linked list of StoredValues to scan.
641 * @param p Predicate to test each element against.
642 * The signature of the predicate function should be equivalent
644 * bool pred(const StoredValue* a);
646 * @return The removed element, or NULL if no matching element was found.
648 template <typename Pred>
649 StoredValue::UniquePtr hashChainRemoveFirst(StoredValue::UniquePtr& chain,
651 if (p(chain.get())) {
653 auto removed = std::move(chain);
654 chain = std::move(removed->next);
658 // Not head element, start searching.
659 for (StoredValue::UniquePtr* curr = &chain; curr->get()->next;
660 curr = &curr->get()->next) {
661 if (p(curr->get()->next.get())) {
662 // next element matches predicate - splice it out of the list.
663 auto removed = std::move(curr->get()->next);
664 curr->get()->next = std::move(removed->next);
673 DISALLOW_COPY_AND_ASSIGN(HashTable);
676 std::ostream& operator<<(std::ostream& os, const HashTable& ht);
679 * Base class for visiting a hash table.
681 class HashTableVisitor {
683 virtual ~HashTableVisitor() {}
686 * Visit an individual item within a hash table. Note that the item is
687 * locked while visited (the appropriate hashTable lock is held).
689 * @param v a pointer to a value in the hash table
691 virtual void visit(const HashTable::HashBucketLock& lh, StoredValue* v) = 0;
694 * True if the visiting should continue.
696 * This is called periodically to ensure the visitor still wants
699 virtual bool shouldContinue() { return true; }
703 * Base class for visiting a hash table with pause/resume support.
705 class PauseResumeHashTableVisitor {
708 * Visit an individual item within a hash table.
710 * @param v a pointer to a value in the hash table.
711 * @return True if visiting should continue, otherwise false.
713 virtual bool visit(StoredValue& v) = 0;
717 * Hash table visitor that reports the depth of each hashtable bucket.
719 class HashTableDepthVisitor {
721 virtual ~HashTableDepthVisitor() {}
724 * Called once for each hashtable bucket with its depth.
726 * @param bucket the index of the hashtable bucket
727 * @param depth the number of entries in this hashtable bucket
728 * @param mem counted memory used by this hash table
730 virtual void visit(int bucket, int depth, size_t mem) = 0;
734 * Hash table visitor that finds the min and max bucket depths.
736 class HashTableDepthStatVisitor : public HashTableDepthVisitor {
739 HashTableDepthStatVisitor()
740 : depthHisto(GrowingWidthGenerator<unsigned int>(1, 1, 1.3), 10),
746 void visit(int bucket, int depth, size_t mem) {
748 // -1 is a special case for min. If there's a value other than
749 // -1, we prefer that.
750 min = std::min(min == -1 ? depth : min, depth);
751 max = std::max(max, depth);
752 depthHisto.add(depth);
757 Histogram<unsigned int> depthHisto;
765 * Track the current number of hashtable visitors.
767 * This class is a pretty generic counter holder that increments on
768 * entry and decrements on return providing RAII guarantees around an
771 class VisitorTracker {
775 * Mark a visitor as visiting.
777 * @param c the counter that should be incremented (and later
780 explicit VisitorTracker(std::atomic<size_t> *c) : counter(c) {
781 counter->fetch_add(1);
784 counter->fetch_sub(1);
787 std::atomic<size_t> *counter;