1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 * Copyright 2016 Couchbase, Inc
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "hash_table.h"
20 #include "stored_value_factories.h"
24 static const ssize_t prime_size_table[] = {
25 3, 7, 13, 23, 47, 97, 193, 383, 769, 1531, 3079, 6143, 12289, 24571, 49157,
26 98299, 196613, 393209, 786433, 1572869, 3145721, 6291449, 12582917,
27 25165813, 50331653, 100663291, 201326611, 402653189, 805306357,
32 std::ostream& operator<<(std::ostream& os, const HashTable::Position& pos) {
33 os << "{lock:" << pos.lock << " bucket:" << pos.hash_bucket << "/" << pos.ht_size << "}";
37 HashTable::HashTable(EPStats& st,
38 std::unique_ptr<AbstractStoredValueFactory> svFactory,
41 : maxDeletedRevSeqno(0),
43 numNonResidentItems(0),
50 initialSize(initialSize),
54 valFact(std::move(svFactory)),
60 mutexes = new std::mutex[n_locks];
64 HashTable::~HashTable() {
65 // Use unlocked clear for the destructor, avoids lock inversions on VBucket
68 // Wait for any outstanding visitors to finish.
69 while (visitors > 0) {
79 void HashTable::clear(bool deactivate) {
81 // If not deactivating, assert we're already active.
83 throw std::logic_error("HashTable::clear: Cannot call on a "
87 MultiLockHolder mlh(mutexes, n_locks);
88 clear_UNLOCKED(deactivate);
91 void HashTable::clear_UNLOCKED(bool deactivate) {
93 setActiveState(false);
95 size_t clearedMemSize = 0;
96 size_t clearedValSize = 0;
97 for (int i = 0; i < (int)size; i++) {
99 // Take ownership of the StoredValue from the vector, update
100 // statistics and release it.
101 auto v = std::move(values[i]);
102 clearedMemSize += v->size();
103 clearedValSize += v->valuelen();
104 values[i] = std::move(v->getNext());
108 stats.currentSize.fetch_sub(clearedMemSize - clearedValSize);
110 datatypeCounts.fill(0);
111 numTotalItems.store(0);
113 numTempItems.store(0);
114 numNonResidentItems.store(0);
119 static size_t distance(size_t a, size_t b) {
120 return std::max(a, b) - std::min(a, b);
123 static size_t nearest(size_t n, size_t a, size_t b) {
124 return (distance(n, a) < distance(b, n)) ? a : b;
127 static bool isCurrently(size_t size, ssize_t a, ssize_t b) {
128 ssize_t current(static_cast<ssize_t>(size));
129 return (current == a || current == b);
132 void HashTable::resize() {
133 size_t ni = getNumInMemoryItems();
137 // Figure out where in the prime table we are.
138 ssize_t target(static_cast<ssize_t>(ni));
139 for (i = 0; prime_size_table[i] > 0 && prime_size_table[i] < target; ++i) {
143 if (prime_size_table[i] == -1) {
144 // We're at the end, take the biggest
145 new_size = prime_size_table[i-1];
146 } else if (prime_size_table[i] < static_cast<ssize_t>(initialSize)) {
147 // Was going to be smaller than the initial size.
148 new_size = initialSize;
150 new_size = prime_size_table[i];
151 }else if (isCurrently(size, prime_size_table[i-1], prime_size_table[i])) {
152 // If one of the candidate sizes is the current size, maintain
153 // the current size in order to remain stable.
156 // Somewhere in the middle, use the one we're closer to.
157 new_size = nearest(ni, prime_size_table[i-1], prime_size_table[i]);
163 void HashTable::resize(size_t newSize) {
165 throw std::logic_error("HashTable::resize: Cannot call on a "
166 "non-active object");
169 // Due to the way hashing works, we can't fit anything larger than
171 if (newSize > static_cast<size_t>(std::numeric_limits<int>::max())) {
175 // Don't resize to the same size, either.
176 if (newSize == size) {
180 MultiLockHolder mlh(mutexes, n_locks);
181 if (visitors.load() > 0) {
182 // Do not allow a resize while any visitors are actually
183 // processing. The next attempt will have to pick it up. New
184 // visitors cannot start doing meaningful work (we own all
185 // locks at this point).
189 // Get a place for the new items.
190 table_type newValues(newSize);
192 stats.memOverhead->fetch_sub(memorySize());
195 // Set the new size so all the hashy stuff works.
196 size_t oldSize = size;
199 // Move existing records into the new space.
200 for (size_t i = 0; i < oldSize; i++) {
202 // unlink the front element from the hash chain at values[i].
203 auto v = std::move(values[i]);
204 values[i] = std::move(v->getNext());
206 // And re-link it into the correct place in newValues.
207 int newBucket = getBucketForHash(v->getKey().hash());
208 v->setNext(std::move(newValues[newBucket]));
209 newValues[newBucket] = std::move(v);
213 // Finally assign the new table to values.
214 values = std::move(newValues);
216 stats.memOverhead->fetch_add(memorySize());
219 StoredValue* HashTable::find(const DocKey& key,
220 TrackReference trackReference,
221 WantsDeleted wantsDeleted) {
223 throw std::logic_error("HashTable::find: Cannot call on a "
224 "non-active object");
226 HashBucketLock hbl = getLockedBucket(key);
227 return unlocked_find(key, hbl.getBucketNum(), wantsDeleted, trackReference);
230 std::unique_ptr<Item> HashTable::getRandomKey(long rnd) {
231 /* Try to locate a partition */
232 size_t start = rnd % size;
234 std::unique_ptr<Item> ret;
237 ret = getRandomKeyFromSlot(curr++);
241 } while (ret == NULL && curr != start);
246 MutationStatus HashTable::set(Item& val) {
247 if (!StoredValue::hasAvailableSpace(stats, val, false)) {
248 return MutationStatus::NoMem;
251 HashBucketLock hbl = getLockedBucket(val.getKey());
252 StoredValue* v = unlocked_find(val.getKey(),
257 return unlocked_updateStoredValue(hbl.getHTLock(), *v, val);
259 unlocked_addNewStoredValue(hbl, val);
260 return MutationStatus::WasClean;
264 MutationStatus HashTable::unlocked_updateStoredValue(
265 const std::unique_lock<std::mutex>& htLock,
269 throw std::invalid_argument(
270 "HashTable::unlocked_updateStoredValue: htLock "
275 throw std::logic_error(
276 "HashTable::unlocked_updateStoredValue: Cannot "
277 "call on a non-active HT object");
280 MutationStatus status =
281 v.isDirty() ? MutationStatus::WasDirty : MutationStatus::WasClean;
282 if (!v.isResident() && !v.isDeleted() && !v.isTempItem()) {
283 decrNumNonResidentItems();
286 if (v.isTempItem()) {
292 if (v.isDeleted() && !itm.isDeleted()) {
295 if (!v.isDeleted() && itm.isDeleted()) {
299 // If the item we are replacing is resident then we need to make sure we
300 // appropriately alter the datatype stats.
301 if (v.getDatatype() != itm.getDataType()) {
302 --datatypeCounts[v.getDatatype()];
303 ++datatypeCounts[itm.getDataType()];
306 /* setValue() will mark v as undeleted if required */
307 v.setValue(itm, *this);
311 StoredValue* HashTable::unlocked_addNewStoredValue(const HashBucketLock& hbl,
313 if (!hbl.getHTLock()) {
314 throw std::invalid_argument(
315 "HashTable::unlocked_addNewStoredValue: htLock "
320 throw std::invalid_argument(
321 "HashTable::unlocked_addNewStoredValue: Cannot "
322 "call on a non-active HT object");
325 // Create a new StoredValue and link it into the head of the bucket chain.
326 auto v = (*valFact)(itm, std::move(values[hbl.getBucketNum()]), *this);
327 if (v->isTempItem()) {
332 ++datatypeCounts[v->getDatatype()];
334 if (v->isDeleted()) {
337 values[hbl.getBucketNum()] = std::move(v);
339 return values[hbl.getBucketNum()].get();
342 std::pair<StoredValue*, StoredValue::UniquePtr>
343 HashTable::unlocked_replaceByCopy(const HashBucketLock& hbl,
344 const StoredValue& vToCopy) {
345 if (!hbl.getHTLock()) {
346 throw std::invalid_argument(
347 "HashTable::unlocked_replaceByCopy: htLock "
352 throw std::invalid_argument(
353 "HashTable::unlocked_replaceByCopy: Cannot "
354 "call on a non-active HT object");
357 /* Release (remove) the StoredValue from the hash table */
358 auto releasedSv = unlocked_release(hbl, vToCopy.getKey());
360 /* Copy the StoredValue and link it into the head of the bucket chain. */
361 auto newSv = valFact->copyStoredValue(
362 vToCopy, std::move(values[hbl.getBucketNum()]), *this);
363 if (newSv->isTempItem()) {
369 values[hbl.getBucketNum()] = std::move(newSv);
371 return {values[hbl.getBucketNum()].get(), std::move(releasedSv)};
374 void HashTable::unlocked_softDelete(const std::unique_lock<std::mutex>& htLock,
376 bool onlyMarkDeleted) {
377 const bool alreadyDeleted = v.isDeleted();
378 if (!v.isResident() && !v.isDeleted() && !v.isTempItem()) {
379 decrNumNonResidentItems();
382 --datatypeCounts[v.getDatatype()];
384 if (onlyMarkDeleted) {
387 if (v.isTempItem()) {
394 if (!alreadyDeleted) {
399 StoredValue* HashTable::unlocked_find(const DocKey& key,
401 WantsDeleted wantsDeleted,
402 TrackReference trackReference) {
403 for (StoredValue* v = values[bucket_num].get(); v; v = v->getNext().get()) {
404 if (v->hasKey(key)) {
405 if (trackReference == TrackReference::Yes && !v->isDeleted()) {
408 if (wantsDeleted == WantsDeleted::Yes || !v->isDeleted()) {
418 void HashTable::unlocked_del(const HashBucketLock& hbl, const DocKey& key) {
419 unlocked_release(hbl, key).reset();
422 StoredValue::UniquePtr HashTable::unlocked_release(
423 const HashBucketLock& hbl, const DocKey& key) {
424 if (!hbl.getHTLock()) {
425 throw std::invalid_argument(
426 "HashTable::unlocked_remove: htLock "
431 throw std::logic_error(
432 "HashTable::unlocked_remove: Cannot call on a "
433 "non-active object");
436 // Remove the first (should only be one) StoredValue with the given key.
437 auto released = hashChainRemoveFirst(
438 values[hbl.getBucketNum()],
439 [key](const StoredValue* v) { return v->hasKey(key); });
442 /* We shouldn't reach here, we must delete the StoredValue in the
444 throw std::logic_error(
445 "HashTable::unlocked_del: StoredValue to be deleted "
446 "not found in HashTable; possibly HashTable leak");
449 // Update statistics now the item has been removed.
450 StoredValue::reduceCacheSize(*this, released->size());
451 StoredValue::reduceMetaDataSize(*this, stats, released->metaDataSize());
452 if (released->isTempItem()) {
457 --datatypeCounts[released->getDatatype()];
458 if (released->isDeleted()) {
465 void HashTable::visit(HashTableVisitor &visitor) {
466 if ((numItems.load() + numTempItems.load()) == 0 || !isActive()) {
470 // Acquire one (any) of the mutexes before incrementing {visitors}, this
471 // prevents any race between this visitor and the HashTable resizer.
472 // See comments in pauseResumeVisit() for further details.
473 std::unique_lock<std::mutex> lh(mutexes[0]);
474 VisitorTracker vt(&visitors);
477 bool aborted = !visitor.shouldContinue();
479 for (int l = 0; isActive() && !aborted && l < static_cast<int>(n_locks);
481 for (int i = l; i < static_cast<int>(size); i+= n_locks) {
482 // (re)acquire mutex on each HashBucket, to minimise any impact
483 // on front-end threads.
484 HashBucketLock lh(i, mutexes[l]);
486 StoredValue* v = values[i].get();
488 // TODO: Perf: This check seems costly - do we think it's still
490 auto hashbucket = getBucketForHash(v->getKey().hash());
491 if (i != hashbucket) {
492 throw std::logic_error("HashTable::visit: inconsistency "
493 "between StoredValue's calculated hashbucket "
494 "(which is " + std::to_string(hashbucket) +
495 ") and bucket is is located in (which is " +
496 std::to_string(i) + ")");
500 StoredValue* tmp = v->getNext().get();
501 visitor.visit(lh, v);
506 aborted = !visitor.shouldContinue();
510 void HashTable::visitDepth(HashTableDepthVisitor &visitor) {
511 if (numItems.load() == 0 || !isActive()) {
515 VisitorTracker vt(&visitors);
517 for (int l = 0; l < static_cast<int>(n_locks); l++) {
518 LockHolder lh(mutexes[l]);
519 for (int i = l; i < static_cast<int>(size); i+= n_locks) {
521 StoredValue* p = values[i].get();
523 // TODO: Perf: This check seems costly - do we think it's still
525 auto hashbucket = getBucketForHash(p->getKey().hash());
526 if (i != hashbucket) {
527 throw std::logic_error("HashTable::visit: inconsistency "
528 "between StoredValue's calculated hashbucket "
529 "(which is " + std::to_string(hashbucket) +
530 ") and bucket it is located in (which is " +
531 std::to_string(i) + ")");
538 p = p->getNext().get();
540 visitor.visit(i, depth, mem);
547 HashTable::pauseResumeVisit(PauseResumeHashTableVisitor& visitor,
548 Position& start_pos) {
549 if ((numItems.load() + numTempItems.load()) == 0 || !isActive()) {
551 return endPosition();
556 // To attempt to minimize the impact the visitor has on normal frontend
557 // operations, we deliberately acquire (and release) the mutex between
558 // each hash_bucket - see `lh` in the inner for() loop below. This means we
559 // hold a given mutex for a large number of short durations, instead of just
560 // one single, long duration.
561 // *However*, there is a potential race with this approach - the {size} of
562 // the HashTable may be changed (by the Resizer task) between us first
563 // reading it to calculate the starting hash_bucket, and then reading it
564 // inside the inner for() loop. To prevent this race, we explicitly acquire
565 // (any) mutex, increment {visitors} and then release the mutex. This
566 //avoids the race as if visitors >0 then Resizer will not attempt to resize.
567 std::unique_lock<std::mutex> lh(mutexes[0]);
568 VisitorTracker vt(&visitors);
571 // Start from the requested lock number if in range.
572 size_t lock = (start_pos.lock < n_locks) ? start_pos.lock : 0;
573 size_t hash_bucket = 0;
575 for (; isActive() && !paused && lock < n_locks; lock++) {
577 // If the bucket position is *this* lock, then start from the
578 // recorded bucket (as long as we haven't resized).
580 if (start_pos.lock == lock &&
581 start_pos.ht_size == size &&
582 start_pos.hash_bucket < size) {
583 hash_bucket = start_pos.hash_bucket;
586 // Iterate across all values in the hash buckets owned by this lock.
587 // Note: we don't record how far into the bucket linked-list we
588 // pause at; so any restart will begin from the next bucket.
589 for (; !paused && hash_bucket < size; hash_bucket += n_locks) {
590 LockHolder lh(mutexes[lock]);
592 StoredValue* v = values[hash_bucket].get();
593 while (!paused && v) {
594 StoredValue* tmp = v->getNext().get();
595 paused = !visitor.visit(*v);
600 // If the visitor paused us before we visited all hash buckets owned
601 // by this lock, we don't want to skip the remaining hash buckets, so
602 // stop the outer for loop from advancing to the next lock.
603 if (paused && hash_bucket < size) {
607 // Finished all buckets owned by this lock. Set hash_bucket to 'size'
608 // to give a consistent marker for "end of lock".
612 // Return the *next* location that should be visited.
613 return HashTable::Position(size, lock, hash_bucket);
616 HashTable::Position HashTable::endPosition() const {
617 return HashTable::Position(size, n_locks, size);
620 bool HashTable::unlocked_ejectItem(StoredValue*& vptr,
621 item_eviction_policy_t policy) {
622 if (vptr == nullptr) {
623 throw std::invalid_argument("HashTable::unlocked_ejectItem: "
624 "Unable to delete NULL StoredValue");
626 if (policy == VALUE_ONLY) {
627 bool rv = vptr->ejectValue(*this, policy);
629 ++stats.numValueEjects;
630 ++numNonResidentItems;
634 ++stats.numFailedEjects;
637 } else { // full eviction.
638 if (vptr->eligibleForEviction(policy)) {
639 StoredValue::reduceMetaDataSize(*this, stats,
640 vptr->metaDataSize());
641 StoredValue::reduceCacheSize(*this, vptr->size());
642 int bucket_num = getBucketForHash(vptr->getKey().hash());
644 // Remove the item from the hash table.
645 auto removed = hashChainRemoveFirst(
647 [vptr](const StoredValue* v) { return v == vptr; });
649 if (removed->isResident()) {
650 ++stats.numValueEjects;
652 if (!removed->isResident() && !removed->isTempItem()) {
653 decrNumNonResidentItems(); // Decrement because the item is
656 decrNumItems(); // Decrement because the item is fully evicted.
657 --datatypeCounts[vptr->getDatatype()];
659 updateMaxDeletedRevSeqno(vptr->getRevSeqno());
663 ++stats.numFailedEjects;
669 std::unique_ptr<Item> HashTable::getRandomKeyFromSlot(int slot) {
670 auto lh = getLockedBucket(slot);
671 for (StoredValue* v = values[slot].get(); v; v = v->getNext().get()) {
672 if (!v->isTempItem() && !v->isDeleted() && v->isResident()) {
673 return v->toItem(false, 0);
680 bool HashTable::unlocked_restoreValue(
681 const std::unique_lock<std::mutex>& htLock,
684 if (!htLock || !isActive() || v.isResident()) {
688 if (v.isTempInitialItem()) { // Regular item with the full eviction
691 /* set it back to false as we created a temp item by setting it to true
692 when bg fetch is scheduled (full eviction mode). */
693 v.setNewCacheItem(false);
694 ++datatypeCounts[itm.getDataType()];
696 decrNumNonResidentItems();
701 StoredValue::increaseCacheSize(*this, v.getValue()->length());
705 void HashTable::unlocked_restoreMeta(const std::unique_lock<std::mutex>& htLock,
709 throw std::invalid_argument(
710 "HashTable::unlocked_restoreMeta: htLock "
715 throw std::logic_error(
716 "HashTable::unlocked_restoreMeta: Cannot "
717 "call on a non-active HT object");
721 if (!itm.isDeleted()) {
724 ++numNonResidentItems;
725 ++datatypeCounts[v.getDatatype()];
729 std::ostream& operator<<(std::ostream& os, const HashTable& ht) {
730 os << "HashTable[" << &ht << "] with"
731 << " numInMemory:" << ht.getNumInMemoryItems()
732 << " numDeleted:" << ht.getNumDeletedItems()
733 << " values: " << std::endl;
734 for (const auto& chain : ht.values) {
736 for (StoredValue* sv = chain.get(); sv != nullptr;
737 sv = sv->getNext().get()) {
738 os << " " << *sv << std::endl;