Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
yasi/YASI_12/ds.HopScotchHashTable.h
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
433 lines (374 sloc)
11.7 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include "common.h" | |
#include "ds.IntLinearProbingHashTable.h" | |
#include <bitset> | |
// enable-disable testing classes in this file | |
#include "test.this.module.h" | |
using namespace std; | |
namespace yasi{ | |
namespace ds{ | |
template<class Value> | |
struct HopScotchEntry{ | |
typedef HopScotchEntry<Value> self; | |
size_t key; | |
Value value; | |
unsigned int bitField; // map of bucket elements | |
HopScotchEntry() : key(0), bitField(0){} | |
HopScotchEntry(const size_t& k) : key(k){} | |
HopScotchEntry(const size_t& k, const Value& v) : key(k), value(v){} | |
HopScotchEntry(const size_t& k, const Value& v, const unsigned int& bf) : | |
key(k), value(v), bitField(bf){} | |
HopScotchEntry(const self& other):key(other.key), value(other.value),bitField(other.bitField){ } | |
self& operator=(const self& other){ | |
key = other.key; value = other.value; bitField = other.bitField; | |
return *this; | |
} | |
}; | |
// override for printing | |
template<class Value> | |
std::ostream& operator<< (std::ostream& out, const HopScotchEntry<Value>& kv) { | |
out << kv.key; | |
return out; | |
} | |
template<class Value> | |
std::ostream& operator<< (std::ostream& out, const HopScotchEntry<Value>* pKv) { | |
out << pKv->key; | |
return out; | |
} | |
// a hash table with integer key | |
template< | |
class Value = size_t, | |
class HashFunction = IntHashFunction<int>, | |
class EntryType = HopScotchEntry<Value > // must have fields `key', `value', and `bitField' | |
> | |
class HopScotchHashTable | |
: public IntLinearProbingHashTable < | |
Value, HashFunction, EntryType > { | |
///////////////// enable testing /////////////// | |
FRIEND_TEST_CLASS( HopScotchHashTableTest); | |
template<typename A, typename B, typename C> | |
FRIEND_TEST_CLASS( LinearProbingHashTableTestBase); | |
public: | |
typedef size_t Key; | |
typedef EntryType BucketType; // each bucket holds an EntryType object | |
typedef EntryType Pair; // which is actually a key-value pair | |
typedef EntryType* BucketEntryPtr; // ptr to an entry object | |
typedef Key KeyType; | |
typedef Value ValueType; | |
protected: | |
typedef IntLinearProbingHashTable < Value, HashFunction, HopScotchEntry<Value> > | |
base; | |
//---------- HopScotch Properties/Methods ---------- | |
static const unsigned int INIT_H = 4; | |
size_t H; // the neighborhood size | |
size_t logH; // log of H | |
size_t _zeroKeyBucket; // the bucket containing (hashed) zero key | |
const int SCAN_BUCKET_PREV = -1; | |
const int SCAN_BUCKET_NEXT = 1; | |
#if _DEBUG | |
int _numHops; | |
#endif // _DEBUG | |
public: | |
virtual ~HopScotchHashTable(){} | |
HopScotchHashTable(unsigned int logSize = INIT_LOGSIZE, unsigned int H = INIT_H) :H(H), IntLinearProbingHashTable(logSize){ | |
assert(_size >= H); | |
assert(IS_POW_OF_TWO(H) == true); | |
logH = whichPowerOfTwo(H); | |
#if _DEBUG | |
_numHops = 0; | |
#endif | |
} | |
protected: | |
// we should find something within first H-1 trials | |
// if not found, returns NULL | |
#if YASI_ALLOW_ZERO_KEY | |
virtual inline bool isNull(const int bucket) const{ | |
if (table[bucket].key != 0){ | |
// key non-zero | |
return false; | |
} | |
// key is zero | |
else if (_zeroUsed == false){ | |
// zero element not used | |
return true; | |
} | |
else{ | |
// zero element used | |
if (bucket == _zeroKeyBucket) | |
// this is the zeroElement, not NULL | |
return false; | |
else | |
return true; | |
} | |
} | |
#else | |
virtual inline bool isNull(const int bucket) const{ | |
return table[bucket].key == 0; | |
} | |
#endif // YASI_ALLOW_ZERO_KEY | |
virtual void copyTable(BucketType* oldTable, const unsigned int oldSize) override{ | |
base::copyTable(oldTable, oldSize); | |
} | |
// swap two bucket entries | |
void swap(const int bucketOne, const int bucketTwo) const{ | |
// do not swap bitmaps | |
EntryType temp = table[bucketOne]; | |
table[bucketOne].key = table[bucketTwo].key; | |
table[bucketOne].value = table[bucketTwo].value; | |
table[bucketTwo].key = temp.key; | |
table[bucketTwo].value = temp.value; | |
} | |
inline void setBitmap(const unsigned int bucket, const unsigned int position, const unsigned int bit) const{ | |
if (bucket >= 0 && bucket < _size && | |
position >= 0 && position < H && | |
bit <= 1){ | |
table[bucket].bitField &= (0x80000000 >> position); | |
} | |
} | |
// returns the next/prev item in bucket | |
// if no more items, returns -1 | |
// in error, returns -1 | |
inline int jumpInBucket( | |
const unsigned int bitmap, | |
const unsigned int curPosInBucket, | |
const int direction) const{ | |
if (direction != SCAN_BUCKET_NEXT && direction != SCAN_BUCKET_PREV){ | |
cerr << "HopScotchHashTable::nextInBucket(): wrong direction: " << direction << endl; | |
return -1; | |
} | |
if (direction == SCAN_BUCKET_NEXT && (curPosInBucket < 0 ||curPosInBucket >= H - 1)){ | |
cerr << "HopScotchHashTable::nextInBucket(): current pos" << curPosInBucket << " is >= (H-1), which is " << (H - 1) << endl; | |
return -1; | |
} | |
if (direction == SCAN_BUCKET_PREV && (curPosInBucket <= 0 || curPosInBucket >= H)){ | |
cerr << "HopScotchHashTable::nextInBucket(): current pos" << curPosInBucket << " is <= 0" << endl; | |
return -1; | |
} | |
int nextPos = curPosInBucket + direction; | |
unsigned int mask = 0x80000000 >> nextPos; | |
for (; mask != 0; nextPos += direction){ | |
int nextBit = (bitmap & mask); | |
if (nextBit) return nextPos; | |
if (direction == SCAN_BUCKET_NEXT){ | |
mask >>= 1; | |
} | |
else{ | |
// scan prev | |
mask <<= 1; | |
} | |
} | |
// no more items | |
return -1; | |
} | |
// returns the prev item in bucket | |
// if no more items, returns -1 | |
// in error, returns -1 | |
inline int prevPosInBucket(const unsigned int bitmap, const unsigned int curPosInBucket) const{ | |
return jumpInBucket(bitmap, curPosInBucket, SCAN_BUCKET_PREV); | |
} | |
// returns the next item in bucket | |
// if no more items, returns -1 | |
// in error, returns -1 | |
inline int nextPosInBucket(const unsigned int bitmap, const unsigned int curPosInBucket) const{ | |
return jumpInBucket(bitmap, curPosInBucket, SCAN_BUCKET_NEXT); | |
} | |
bool canSwapCurWithEmpty(const int firstBucket, const int cur, const int hashCur, const int emptySlot){ | |
// check boundaries | |
if (firstBucket == emptySlot || | |
firstBucket == cur || | |
cur == emptySlot) { | |
return false; | |
} | |
// in forward direction | |
// current cannot go to the left of firstBucket or right of emptySlot | |
if (firstBucket < emptySlot && // non-circular direction | |
(cur <= firstBucket || cur >= emptySlot) ){ | |
return false; | |
} | |
// in circular direction | |
// current cannot go to the left of firstBucket or right of emptySlot | |
if (firstBucket > emptySlot && // circular direction | |
(cur <= firstBucket && cur >= emptySlot)){ | |
return false; | |
} | |
bool hashCurIsAtRightOfFirstBucket = false; | |
bool hashCurIsAtLeftOfEmptySlot = false; | |
bool emptySlotInNeighborhoodOfHashCur = false; | |
int diffHashCurToEmptySlot = 0; | |
// regular or circular search? | |
if (firstBucket < emptySlot){ | |
// regular search | |
diffHashCurToEmptySlot = emptySlot - hashCur; // circularDiff(hashCur, emptySlot); | |
hashCurIsAtLeftOfEmptySlot = diffHashCurToEmptySlot > 0; | |
hashCurIsAtRightOfFirstBucket = hashCur - firstBucket > 0; | |
emptySlotInNeighborhoodOfHashCur = diffHashCurToEmptySlot < (int) H; | |
} | |
else{ | |
// firstBucket >= emptySlot | |
if (firstBucket == emptySlot) return NULL; | |
// circular search | |
diffHashCurToEmptySlot = circularDiff(hashCur, emptySlot); | |
if (hashCur > firstBucket){ | |
hashCurIsAtRightOfFirstBucket = true; | |
hashCurIsAtLeftOfEmptySlot = true; | |
emptySlotInNeighborhoodOfHashCur = diffHashCurToEmptySlot < (int) H; | |
} | |
else{ | |
if (hashCur == firstBucket) return NULL; | |
// hashCur < firstBucket | |
if (hashCur < emptySlot){ | |
hashCurIsAtRightOfFirstBucket = true; | |
hashCurIsAtLeftOfEmptySlot = true; | |
emptySlotInNeighborhoodOfHashCur = diffHashCurToEmptySlot < (int) H; | |
} | |
else{ | |
// hashCur >= emptySlot | |
return NULL; | |
} | |
} | |
} | |
return hashCurIsAtRightOfFirstBucket && | |
hashCurIsAtLeftOfEmptySlot && | |
emptySlotInNeighborhoodOfHashCur | |
; | |
} | |
// repeatedly push this empty slot to the left until it comes within | |
// the neighborhood of firstBucket | |
// returns the ptr to the slot on success | |
// returns NULL if the pull is unsuccessful | |
BucketType* pullFromLeft(const int firstBucket, const int emptySlot){ | |
//when to stop | |
uint remainingSlots = circularDiff(firstBucket, emptySlot); | |
if (remainingSlots < H){ | |
// the empty slot is within the neighborhood of firstBucket | |
return &table[emptySlot]; | |
} | |
// H | |
// <--------------> | |
// i - - - j - - - [] - - - - | |
// #j | |
// | |
// i is the firstBucket | |
// try to find an item j to the left of emptySlot | |
// which is withing H-1 distable from emptySlot, and | |
// whose hashed index #j is within H-1 distance from emptySlot | |
// and to the right of i | |
// look in the H-1 slots to the left of emptySlot | |
int leftLimit = modSize(emptySlot - H + 1); | |
for (int cur = leftLimit; cur != emptySlot; cur = circularNext(cur)){ | |
size_t curKey = table[cur].key; | |
if (curKey == 0) continue; | |
int hashCur = index(curKey); | |
if (canSwapCurWithEmpty(firstBucket, cur, hashCur, emptySlot)) | |
{ | |
// candidate for swap | |
swap(cur, emptySlot); | |
// calculate bitmap bit positions set/reset | |
int diff1 = circularDiff(hashCur, cur); // before | |
int diff1final = circularDiff(hashCur, emptySlot); // after | |
// update bitmaps | |
setBitmap(hashCur, diff1, 0); | |
setBitmap(hashCur, diff1final, 1); | |
// repeat | |
return pullFromLeft(firstBucket, cur); | |
} | |
} | |
// no candidate for swap found | |
return NULL; | |
} | |
virtual BucketType* insertKey(const Key& k){ | |
if (k == 0){ | |
cerr << "HopScotchHashTable::insertKey(): key 0 is not allowed." << endl; | |
return NULL; | |
} | |
// check whether we need to grow for this new item | |
if (needGrow(_population + 1)) | |
grow(); | |
int firstBucket = index(k); | |
// try to find an empty slot | |
int cur = firstBucket; | |
while (!isNull(cur) ){ | |
cur = circularNext(cur); | |
if (cur == firstBucket) {// we tried all buckets | |
grow(); | |
// time to grow | |
return insertKey(k); | |
} | |
} | |
// found an empty slot | |
uint positionInBucket = circularDiff(firstBucket, cur); | |
if ( positionInBucket < H){ | |
// empty slot within the neighborhood of firstBucket | |
table[cur].key = k; | |
_population++; | |
// set this bit in bitmap of firstBucket | |
setBitmap(firstBucket, positionInBucket, 1); | |
return &table[cur]; | |
} | |
else{ | |
// empty slot out of the immediate neighborhood | |
// cur is the empty bucket | |
// pull it within the immediate neighborhood | |
BucketType* pBucket = pullFromLeft(firstBucket, cur); | |
if (pBucket){ | |
// now cur contains the final position of emptySlot | |
// moved the empty slot to the immediate neighborhood | |
table[cur].key = k; | |
_population++; | |
// set this bit in bitmap of firstBucket | |
int emptyPosFinal = (pBucket - table); | |
positionInBucket = circularDiff(firstBucket, emptyPosFinal); | |
setBitmap(firstBucket, positionInBucket, 1); | |
return &table[cur]; | |
} | |
else{ | |
// could not bring the NULL element back | |
// time to resize and rehash | |
grow(); | |
return insertKey(k); | |
} | |
} | |
} | |
virtual BucketType* lookupKey(const Key& k){ | |
if (k == 0){ | |
cerr << "HopScotchHashTable::lookupKey(): key 0 is not allowed." << endl; | |
return NULL; | |
} | |
int firstBucket = index(k); | |
const unsigned int bitmap = table[firstBucket].bitField; | |
#if _DEBUG | |
// keep track of how many hops till found | |
_numHops = 0; | |
#endif | |
// keep probing to the right | |
// the item, if exists, must be in the neighborhood H | |
for (uint i = 0; i < H; i = nextPosInBucket(bitmap, i)){ | |
if (table[firstBucket + i].key == k) | |
return &table[firstBucket + i]; | |
#if _DEBUG | |
else | |
_numHops++; | |
#endif | |
} | |
return NULL; | |
} | |
virtual void removeKey(const Key& k){ | |
if (k == 0){ | |
cerr << "HopScotchHashTable::remove(): key 0 is not allowed." << endl; | |
return; | |
} | |
BucketType* pBucket = lookupKey(k); | |
if (pBucket){ | |
pBucket->key = 0; | |
_population--; | |
// update bitmap | |
int firstBucket = index(k); | |
int cur = (pBucket - table); | |
int positionInBucket = circularDiff(firstBucket, cur); | |
setBitmap(firstBucket, positionInBucket, 0); | |
} | |
} | |
public: | |
}; | |
} | |
} |