1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 * Copyright 2016 Couchbase, Inc
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "hash_table.h"
20 #include "stored_value_factories.h"
24 // Given we default to 1024 vBuckets, set default HT buckets to lowest
25 // prime in our table - this still gives space for 3072 HT slots but
26 // minimizes fixed overheads.
27 size_t HashTable::defaultNumBuckets = 3;
28 size_t HashTable::defaultNumLocks = 193;
30 static ssize_t prime_size_table[] = {
31 3, 7, 13, 23, 47, 97, 193, 383, 769, 1531, 3079, 6143, 12289, 24571, 49157,
32 98299, 196613, 393209, 786433, 1572869, 3145721, 6291449, 12582917,
33 25165813, 50331653, 100663291, 201326611, 402653189, 805306357,
38 std::ostream& operator<<(std::ostream& os, const HashTable::Position& pos) {
39 os << "{lock:" << pos.lock << " bucket:" << pos.hash_bucket << "/" << pos.ht_size << "}";
43 HashTable::HashTable(EPStats& st,
44 std::unique_ptr<AbstractStoredValueFactory> svFactory,
46 : maxDeletedRevSeqno(0),
48 numNonResidentItems(0),
56 valFact(std::move(svFactory)),
61 size = HashTable::getNumBuckets(s);
62 n_locks = HashTable::getNumLocks(l);
64 mutexes = new std::mutex[n_locks];
68 HashTable::~HashTable() {
69 // Use unlocked clear for the destructor, avoids lock inversions on VBucket
72 // Wait for any outstanding visitors to finish.
73 while (visitors > 0) {
83 void HashTable::clear(bool deactivate) {
85 // If not deactivating, assert we're already active.
87 throw std::logic_error("HashTable::clear: Cannot call on a "
91 MultiLockHolder mlh(mutexes, n_locks);
92 clear_UNLOCKED(deactivate);
95 void HashTable::clear_UNLOCKED(bool deactivate) {
97 setActiveState(false);
99 size_t clearedMemSize = 0;
100 size_t clearedValSize = 0;
101 for (int i = 0; i < (int)size; i++) {
103 // Take ownership of the StoredValue from the vector, update
104 // statistics and release it.
105 auto v = std::move(values[i]);
106 clearedMemSize += v->size();
107 clearedValSize += v->valuelen();
108 values[i] = std::move(v->next);
112 stats.currentSize.fetch_sub(clearedMemSize - clearedValSize);
114 datatypeCounts.fill(0);
115 numTotalItems.store(0);
117 numTempItems.store(0);
118 numNonResidentItems.store(0);
123 static size_t distance(size_t a, size_t b) {
124 return std::max(a, b) - std::min(a, b);
127 static size_t nearest(size_t n, size_t a, size_t b) {
128 return (distance(n, a) < distance(b, n)) ? a : b;
131 static bool isCurrently(size_t size, ssize_t a, ssize_t b) {
132 ssize_t current(static_cast<ssize_t>(size));
133 return (current == a || current == b);
136 void HashTable::resize() {
137 size_t ni = getNumInMemoryItems();
141 // Figure out where in the prime table we are.
142 ssize_t target(static_cast<ssize_t>(ni));
143 for (i = 0; prime_size_table[i] > 0 && prime_size_table[i] < target; ++i) {
147 if (prime_size_table[i] == -1) {
148 // We're at the end, take the biggest
149 new_size = prime_size_table[i-1];
150 } else if (prime_size_table[i] < static_cast<ssize_t>(defaultNumBuckets)) {
151 // Was going to be smaller than the configured ht_size.
152 new_size = defaultNumBuckets;
154 new_size = prime_size_table[i];
155 }else if (isCurrently(size, prime_size_table[i-1], prime_size_table[i])) {
156 // If one of the candidate sizes is the current size, maintain
157 // the current size in order to remain stable.
160 // Somewhere in the middle, use the one we're closer to.
161 new_size = nearest(ni, prime_size_table[i-1], prime_size_table[i]);
167 void HashTable::resize(size_t newSize) {
169 throw std::logic_error("HashTable::resize: Cannot call on a "
170 "non-active object");
173 // Due to the way hashing works, we can't fit anything larger than
175 if (newSize > static_cast<size_t>(std::numeric_limits<int>::max())) {
179 // Don't resize to the same size, either.
180 if (newSize == size) {
184 MultiLockHolder mlh(mutexes, n_locks);
185 if (visitors.load() > 0) {
186 // Do not allow a resize while any visitors are actually
187 // processing. The next attempt will have to pick it up. New
188 // visitors cannot start doing meaningful work (we own all
189 // locks at this point).
193 // Get a place for the new items.
194 table_type newValues(newSize);
196 stats.memOverhead->fetch_sub(memorySize());
199 // Set the new size so all the hashy stuff works.
200 size_t oldSize = size;
203 // Move existing records into the new space.
204 for (size_t i = 0; i < oldSize; i++) {
206 // unlink the front element from the hash chain at values[i].
207 auto v = std::move(values[i]);
208 values[i] = std::move(v->next);
210 // And re-link it into the correct place in newValues.
211 int newBucket = getBucketForHash(v->getKey().hash());
212 v->next = std::move(newValues[newBucket]);
213 newValues[newBucket] = std::move(v);
217 // Finally assign the new table to values.
218 values = std::move(newValues);
220 stats.memOverhead->fetch_add(memorySize());
223 StoredValue* HashTable::find(const DocKey& key,
224 TrackReference trackReference,
225 WantsDeleted wantsDeleted) {
227 throw std::logic_error("HashTable::find: Cannot call on a "
228 "non-active object");
230 HashBucketLock hbl = getLockedBucket(key);
231 return unlocked_find(key, hbl.getBucketNum(), wantsDeleted, trackReference);
234 std::unique_ptr<Item> HashTable::getRandomKey(long rnd) {
235 /* Try to locate a partition */
236 size_t start = rnd % size;
238 std::unique_ptr<Item> ret;
241 ret = getRandomKeyFromSlot(curr++);
245 } while (ret == NULL && curr != start);
250 MutationStatus HashTable::set(Item& val) {
251 if (!StoredValue::hasAvailableSpace(stats, val, false)) {
252 return MutationStatus::NoMem;
255 HashBucketLock hbl = getLockedBucket(val.getKey());
256 StoredValue* v = unlocked_find(val.getKey(),
261 return unlocked_updateStoredValue(hbl.getHTLock(), *v, val);
263 unlocked_addNewStoredValue(hbl, val);
264 return MutationStatus::WasClean;
268 MutationStatus HashTable::unlocked_updateStoredValue(
269 const std::unique_lock<std::mutex>& htLock,
273 throw std::invalid_argument(
274 "HashTable::unlocked_updateStoredValue: htLock "
279 throw std::logic_error(
280 "HashTable::unlocked_updateStoredValue: Cannot "
281 "call on a non-active HT object");
284 MutationStatus status =
285 v.isDirty() ? MutationStatus::WasDirty : MutationStatus::WasClean;
286 if (!v.isResident() && !v.isDeleted() && !v.isTempItem()) {
287 decrNumNonResidentItems();
290 if (v.isTempItem()) {
296 if (v.isDeleted() && !itm.isDeleted()) {
299 if (!v.isDeleted() && itm.isDeleted()) {
303 // If the item we are replacing is resident then we need to make sure we
304 // appropriately alter the datatype stats.
305 if (v.getDatatype() != itm.getDataType()) {
306 --datatypeCounts[v.getDatatype()];
307 ++datatypeCounts[itm.getDataType()];
310 /* setValue() will mark v as undeleted if required */
311 v.setValue(itm, *this);
315 StoredValue* HashTable::unlocked_addNewStoredValue(const HashBucketLock& hbl,
317 if (!hbl.getHTLock()) {
318 throw std::invalid_argument(
319 "HashTable::unlocked_addNewStoredValue: htLock "
324 throw std::invalid_argument(
325 "HashTable::unlocked_addNewStoredValue: Cannot "
326 "call on a non-active HT object");
329 // Create a new StoredValue and link it into the head of the bucket chain.
330 auto v = (*valFact)(itm, std::move(values[hbl.getBucketNum()]), *this);
331 if (v->isTempItem()) {
336 ++datatypeCounts[v->getDatatype()];
338 if (v->isDeleted()) {
341 values[hbl.getBucketNum()] = std::move(v);
343 return values[hbl.getBucketNum()].get();
346 std::pair<StoredValue*, StoredValue::UniquePtr>
347 HashTable::unlocked_replaceByCopy(const HashBucketLock& hbl,
348 const StoredValue& vToCopy) {
349 if (!hbl.getHTLock()) {
350 throw std::invalid_argument(
351 "HashTable::unlocked_replaceByCopy: htLock "
356 throw std::invalid_argument(
357 "HashTable::unlocked_replaceByCopy: Cannot "
358 "call on a non-active HT object");
361 /* Release (remove) the StoredValue from the hash table */
362 auto releasedSv = unlocked_release(hbl, vToCopy.getKey());
364 /* Copy the StoredValue and link it into the head of the bucket chain. */
365 auto newSv = valFact->copyStoredValue(
366 vToCopy, std::move(values[hbl.getBucketNum()]), *this);
367 if (newSv->isTempItem()) {
373 values[hbl.getBucketNum()] = std::move(newSv);
375 return {values[hbl.getBucketNum()].get(), std::move(releasedSv)};
378 void HashTable::unlocked_softDelete(const std::unique_lock<std::mutex>& htLock,
380 bool onlyMarkDeleted) {
381 const bool alreadyDeleted = v.isDeleted();
382 if (!v.isResident() && !v.isDeleted() && !v.isTempItem()) {
383 decrNumNonResidentItems();
386 --datatypeCounts[v.getDatatype()];
388 if (onlyMarkDeleted) {
391 if (v.isTempItem()) {
398 if (!alreadyDeleted) {
403 StoredValue* HashTable::unlocked_find(const DocKey& key,
405 WantsDeleted wantsDeleted,
406 TrackReference trackReference) {
407 for (StoredValue* v = values[bucket_num].get(); v; v = v->next.get()) {
408 if (v->hasKey(key)) {
409 if (trackReference == TrackReference::Yes && !v->isDeleted()) {
412 if (wantsDeleted == WantsDeleted::Yes || !v->isDeleted()) {
422 void HashTable::unlocked_del(const HashBucketLock& hbl, const DocKey& key) {
423 unlocked_release(hbl, key).reset();
426 StoredValue::UniquePtr HashTable::unlocked_release(
427 const HashBucketLock& hbl, const DocKey& key) {
428 if (!hbl.getHTLock()) {
429 throw std::invalid_argument(
430 "HashTable::unlocked_remove: htLock "
435 throw std::logic_error(
436 "HashTable::unlocked_remove: Cannot call on a "
437 "non-active object");
440 // Remove the first (should only be one) StoredValue with the given key.
441 auto released = hashChainRemoveFirst(
442 values[hbl.getBucketNum()],
443 [key](const StoredValue* v) { return v->hasKey(key); });
446 /* We shouldn't reach here, we must delete the StoredValue in the
448 throw std::logic_error(
449 "HashTable::unlocked_del: StoredValue to be deleted "
450 "not found in HashTable; possibly HashTable leak");
453 // Update statistics now the item has been removed.
454 StoredValue::reduceCacheSize(*this, released->size());
455 StoredValue::reduceMetaDataSize(*this, stats, released->metaDataSize());
456 if (released->isTempItem()) {
461 --datatypeCounts[released->getDatatype()];
462 if (released->isDeleted()) {
469 void HashTable::visit(HashTableVisitor &visitor) {
470 if ((numItems.load() + numTempItems.load()) == 0 || !isActive()) {
474 // Acquire one (any) of the mutexes before incrementing {visitors}, this
475 // prevents any race between this visitor and the HashTable resizer.
476 // See comments in pauseResumeVisit() for further details.
477 std::unique_lock<std::mutex> lh(mutexes[0]);
478 VisitorTracker vt(&visitors);
481 bool aborted = !visitor.shouldContinue();
483 for (int l = 0; isActive() && !aborted && l < static_cast<int>(n_locks);
485 for (int i = l; i < static_cast<int>(size); i+= n_locks) {
486 // (re)acquire mutex on each HashBucket, to minimise any impact
487 // on front-end threads.
488 HashBucketLock lh(i, mutexes[l]);
490 StoredValue* v = values[i].get();
492 // TODO: Perf: This check seems costly - do we think it's still
494 auto hashbucket = getBucketForHash(v->getKey().hash());
495 if (i != hashbucket) {
496 throw std::logic_error("HashTable::visit: inconsistency "
497 "between StoredValue's calculated hashbucket "
498 "(which is " + std::to_string(hashbucket) +
499 ") and bucket is is located in (which is " +
500 std::to_string(i) + ")");
504 StoredValue* tmp = v->next.get();
505 visitor.visit(lh, v);
510 aborted = !visitor.shouldContinue();
514 void HashTable::visitDepth(HashTableDepthVisitor &visitor) {
515 if (numItems.load() == 0 || !isActive()) {
519 VisitorTracker vt(&visitors);
521 for (int l = 0; l < static_cast<int>(n_locks); l++) {
522 LockHolder lh(mutexes[l]);
523 for (int i = l; i < static_cast<int>(size); i+= n_locks) {
525 StoredValue* p = values[i].get();
527 // TODO: Perf: This check seems costly - do we think it's still
529 auto hashbucket = getBucketForHash(p->getKey().hash());
530 if (i != hashbucket) {
531 throw std::logic_error("HashTable::visit: inconsistency "
532 "between StoredValue's calculated hashbucket "
533 "(which is " + std::to_string(hashbucket) +
534 ") and bucket it is located in (which is " +
535 std::to_string(i) + ")");
544 visitor.visit(i, depth, mem);
551 HashTable::pauseResumeVisit(PauseResumeHashTableVisitor& visitor,
552 Position& start_pos) {
553 if ((numItems.load() + numTempItems.load()) == 0 || !isActive()) {
555 return endPosition();
560 // To attempt to minimize the impact the visitor has on normal frontend
561 // operations, we deliberately acquire (and release) the mutex between
562 // each hash_bucket - see `lh` in the inner for() loop below. This means we
563 // hold a given mutex for a large number of short durations, instead of just
564 // one single, long duration.
565 // *However*, there is a potential race with this approach - the {size} of
566 // the HashTable may be changed (by the Resizer task) between us first
567 // reading it to calculate the starting hash_bucket, and then reading it
568 // inside the inner for() loop. To prevent this race, we explicitly acquire
569 // (any) mutex, increment {visitors} and then release the mutex. This
570 //avoids the race as if visitors >0 then Resizer will not attempt to resize.
571 std::unique_lock<std::mutex> lh(mutexes[0]);
572 VisitorTracker vt(&visitors);
575 // Start from the requested lock number if in range.
576 size_t lock = (start_pos.lock < n_locks) ? start_pos.lock : 0;
577 size_t hash_bucket = 0;
579 for (; isActive() && !paused && lock < n_locks; lock++) {
581 // If the bucket position is *this* lock, then start from the
582 // recorded bucket (as long as we haven't resized).
584 if (start_pos.lock == lock &&
585 start_pos.ht_size == size &&
586 start_pos.hash_bucket < size) {
587 hash_bucket = start_pos.hash_bucket;
590 // Iterate across all values in the hash buckets owned by this lock.
591 // Note: we don't record how far into the bucket linked-list we
592 // pause at; so any restart will begin from the next bucket.
593 for (; !paused && hash_bucket < size; hash_bucket += n_locks) {
594 LockHolder lh(mutexes[lock]);
596 StoredValue* v = values[hash_bucket].get();
597 while (!paused && v) {
598 StoredValue* tmp = v->next.get();
599 paused = !visitor.visit(*v);
604 // If the visitor paused us before we visited all hash buckets owned
605 // by this lock, we don't want to skip the remaining hash buckets, so
606 // stop the outer for loop from advancing to the next lock.
607 if (paused && hash_bucket < size) {
611 // Finished all buckets owned by this lock. Set hash_bucket to 'size'
612 // to give a consistent marker for "end of lock".
616 // Return the *next* location that should be visited.
617 return HashTable::Position(size, lock, hash_bucket);
620 HashTable::Position HashTable::endPosition() const {
621 return HashTable::Position(size, n_locks, size);
624 static inline size_t getDefault(size_t x, size_t d) {
625 return x == 0 ? d : x;
628 size_t HashTable::getNumBuckets(size_t n) {
629 return getDefault(n, defaultNumBuckets);
632 size_t HashTable::getNumLocks(size_t n) {
633 return getDefault(n, defaultNumLocks);
636 void HashTable::setDefaultNumBuckets(size_t to) {
638 defaultNumBuckets = to;
642 void HashTable::setDefaultNumLocks(size_t to) {
644 defaultNumLocks = to;
648 bool HashTable::unlocked_ejectItem(StoredValue*& vptr,
649 item_eviction_policy_t policy) {
650 if (vptr == nullptr) {
651 throw std::invalid_argument("HashTable::unlocked_ejectItem: "
652 "Unable to delete NULL StoredValue");
654 if (policy == VALUE_ONLY) {
655 bool rv = vptr->ejectValue(*this, policy);
657 ++stats.numValueEjects;
658 ++numNonResidentItems;
662 ++stats.numFailedEjects;
665 } else { // full eviction.
666 if (vptr->eligibleForEviction(policy)) {
667 StoredValue::reduceMetaDataSize(*this, stats,
668 vptr->metaDataSize());
669 StoredValue::reduceCacheSize(*this, vptr->size());
670 int bucket_num = getBucketForHash(vptr->getKey().hash());
672 // Remove the item from the hash table.
673 auto removed = hashChainRemoveFirst(
675 [vptr](const StoredValue* v) { return v == vptr; });
677 if (removed->isResident()) {
678 ++stats.numValueEjects;
680 if (!removed->isResident() && !removed->isTempItem()) {
681 decrNumNonResidentItems(); // Decrement because the item is
684 decrNumItems(); // Decrement because the item is fully evicted.
685 --datatypeCounts[vptr->getDatatype()];
687 updateMaxDeletedRevSeqno(vptr->getRevSeqno());
691 ++stats.numFailedEjects;
697 std::unique_ptr<Item> HashTable::getRandomKeyFromSlot(int slot) {
698 auto lh = getLockedBucket(slot);
699 for (StoredValue* v = values[slot].get(); v; v = v->next.get()) {
700 if (!v->isTempItem() && !v->isDeleted() && v->isResident()) {
701 return v->toItem(false, 0);
708 bool HashTable::unlocked_restoreValue(
709 const std::unique_lock<std::mutex>& htLock,
712 if (!htLock || !isActive() || v.isResident()) {
716 if (v.isTempInitialItem()) { // Regular item with the full eviction
719 /* set it back to false as we created a temp item by setting it to true
720 when bg fetch is scheduled (full eviction mode). */
721 v.setNewCacheItem(false);
722 ++datatypeCounts[itm.getDataType()];
724 decrNumNonResidentItems();
729 StoredValue::increaseCacheSize(*this, v.getValue()->length());
733 void HashTable::unlocked_restoreMeta(const std::unique_lock<std::mutex>& htLock,
737 throw std::invalid_argument(
738 "HashTable::unlocked_restoreMeta: htLock "
743 throw std::logic_error(
744 "HashTable::unlocked_restoreMeta: Cannot "
745 "call on a non-active HT object");
749 if (!itm.isDeleted()) {
752 ++numNonResidentItems;
753 ++datatypeCounts[v.getDatatype()];
757 std::ostream& operator<<(std::ostream& os, const HashTable& ht) {
758 os << "HashTable[" << &ht << "] with"
759 << " numInMemory:" << ht.getNumInMemoryItems()
760 << " numDeleted:" << ht.getNumDeletedItems()
761 << " values: " << std::endl;
762 for (const auto& chain : ht.values) {
764 for (StoredValue* sv = chain.get(); sv != nullptr;
765 sv = sv->next.get()) {
766 os << " " << *sv << std::endl;