diff --git a/YASI_12/YASI_12.vcxproj.filters b/YASI_12/YASI_12.vcxproj.filters index cf648d9..5d40c3d 100644 --- a/YASI_12/YASI_12.vcxproj.filters +++ b/YASI_12/YASI_12.vcxproj.filters @@ -126,7 +126,7 @@ Header Files\Dictionary - Header Files + Header Files\Dictionary \ No newline at end of file diff --git a/YASI_12/ds.HopScotchHashTable.h b/YASI_12/ds.HopScotchHashTable.h index 7cecc7e..e0b9d22 100644 --- a/YASI_12/ds.HopScotchHashTable.h +++ b/YASI_12/ds.HopScotchHashTable.h @@ -8,23 +8,45 @@ using namespace std; template struct HopScotchEntry{ + typedef HopScotchEntry self; size_t key; Value value; int bitField; // map of bucket elements + HopScotchEntry() : key(0), bitField(0){} HopScotchEntry(const size_t& k) : key(k){} - HopScotchEntry(const size_t& k, const value& v) : key(k), value(v){} - HopScotchEntry(const size_t& k, const value& v, const int& bf) : - key(k), value(v),bitField(bf){} + HopScotchEntry(const size_t& k, const Value& v) : key(k), value(v){} + HopScotchEntry(const size_t& k, const Value& v, const int& bf) : + key(k), value(v), bitField(bf){} + HopScotchEntry(const self& other):key(other.key), value(other.value),bitField(other.bitField){ } + self& operator=(const self& other){ + key = other.key; value = other.value; bitField = other.bitField; + return *this; + } }; +// override for printing +template +std::ostream& operator<< (std::ostream& out, const HopScotchEntry& kv) { + out << kv.key; + return out; +} +template +std::ostream& operator<< (std::ostream& out, const HopScotchEntry* pKv) { + out << pKv->key; + return out; +} + + // a hash table with integer key template< class Value = size_t, - class HashFunction = IntHashFunction > + class HashFunction = IntHashFunction, + class EntryType = HopScotchEntry // must have fields `key', `value', and `bitField' +> class HopScotchHashTable : public IntLinearProbingHashTable < - Value, HashFunction, HopScotchEntry > { + Value, HashFunction, EntryType > { ///////////////// enable testing /////////////// friend class HopScotchHashTableTest; @@ -33,7 +55,6 @@ class HopScotchHashTable public: typedef size_t Key; - typedef KVPair EntryType; // must have a public member `key' typedef EntryType BucketType; // each bucket holds an EntryType object typedef EntryType Pair; // which is actually a key-value pair typedef EntryType* BucketEntryPtr; // ptr to an entry object @@ -41,33 +62,182 @@ class HopScotchHashTable typedef Value ValueType; protected: - typedef IntLinearProbingHashTable < Value, HashFunction > + typedef IntLinearProbingHashTable < Value, HashFunction, HopScotchEntry > base; + virtual void copyTable(BucketType* oldTable, const unsigned int oldSize) override{ - h::copyTable(oldTable, oldSize); + base::copyTable(oldTable, oldSize); + } + + // swap two bucket entries + void swap(const int bucketOne, const int bucketTwo) const{ + EntryType temp = table[bucketOne]; + table[bucketOne] = table[bucketTwo]; + table[bucketTwo] = temp; } //---------- HopScotch Properties/Methods ---------- - static unsigned int INIT_H = 32; - unsigned size_t H; // the neighborhood size + static const unsigned int INIT_H = 4; + size_t H; // the neighborhood size + size_t _zeroKeyBucket; // the bucket containing (hashed) zero key + // we should find something within first H-1 trials + // if not found, returns NULL + BucketType* lookupRight(const int bucket, const Key& key){ + + } +#if YASI_ALLOW_ZERO_KEY + virtual inline bool isNull(const int bucket) const{ + if (table[bucket].key != 0){ + // key non-zero + return false; + } + // key is zero + else if (_zeroUsed == false){ + // zero element not used + return true; + } + else{ + // zero element used + if (bucket == _zeroKeyBucket) + // this is the zeroElement, not NULL + return false; + else + return true; + } + } +#else + virtual inline bool isNull(const int bucket) const{ + return table[bucket].key == 0; + } +#endif // YASI_ALLOW_ZERO_KEY + // push this entry to the right - bool pushToRight(const int bucket){} - // fill this bucket with an entry from the left - void pullFromLeft(const int bucket){} + // returns false if it is not possible to push + // returns true on success + bool pushToRight(const int bucket, int startOfBumping = -1){ + if (startOfBumping > 0 ){ + // .... bucket startOfBumping .... + if (circularDiff(bucket, startOfBumping) == 1) { + // we traversed the entire array + // this operation is impossible + return false; + } + else{ + // continue working below + } + } + else{ + // initialize search + startOfBumping = bucket; + } + + bool found = false; + int cur = bucket; + + // try at most H-1 places to the right + int firstBucket = index(table[bucket].key); + // this is the invariant of hopscotch hashing + if (circularDiff(firstBucket, bucket) >= H){ + // the table is broken + cerr << "HopScotch invariant broken: item at bucket [" << bucket << "] is originally hashed at bucket [" << firstBucket << "], which is >= H, which is " << H << endl; + return false; + } + + int last = modSize(firstBucket + H-1); + if (bucket == last){ + // this bucket is not allowed to leave the neighborhood + return false; + } + int next; + for (; cur != last; cur = next){ + next = circularNext(cur); + if (next == startOfBumping) { + // tried all slots + // no way we can bump any more + return false; + } + + if ( isNull(next) ){ + // insert into blank slot + table[next] = table[bucket]; + return true; + } + } + // could not place the entry within neighborhood H + // bump the last guy in the neighborhood + // make room + if (pushToRight(last, startOfBumping)){ + table[last] = table[bucket]; + return true; + } + else{ + // the hopping failed + return false; + } + } + + virtual BucketType* insertKey(const Key& k){ + int firstBucket = index(k); + int cur = firstBucket; + int end = modSize(firstBucket + H); + while (!isNull(cur) && cur != end){ + cur = circularNext(cur); + } + if (cur == end){ + // could not insert within neighborhood + // bump the last guy + if (pushToRight( circularPrev(end) )){ + table[end].key = k; + _population++; + return &table[end]; + } + else{ + // could not insert + // need to expand table + return NULL; + } + } + else{ + // cur must be NULL + table[cur].key = k; + _population++; + return &table[end]; + } + } + + virtual BucketType* lookupKey(const Key& k){ + int firstBucket = index(k); + for (int i = 0; i < H; i++){ + if (table[firstBucket + i].key == k) + return &table[firstBucket + i]; + } + return NULL; + } + + virtual void removeKey(const Key& k){ + BucketType* pBucket = lookupKey(k); + if (pBucket){ + pBucket->key = 0; + pBucket->bitField = 0; + _population--; + } + } public: virtual ~HopScotchHashTable(){} - HopScotchHashTable(unsigned int logSize = INIT_LOGSIZE, unsigned int H = INIT_H) :H(H),IntLinearProbingHashTable(logSize){} + HopScotchHashTable(unsigned int logSize = INIT_LOGSIZE, unsigned int H = INIT_H) :H(H),IntLinearProbingHashTable(logSize){ + assert(_size >= H); + } }; ////////// test IntLinearProbingHashTable // inherit tests from base class, just redefine the types -class HopScotchHashTableTest +class HopScotchHashTableTest : public yasi::Test // : public LinearProbingHashTableTestBase< // HopScotchHashTable >, // HopScotchHashTable >, @@ -87,11 +257,112 @@ class HopScotchHashTableTest public: // inherit all public tests void pushToRight(){ - IntHashTable h(4, 4); // size 16, H=4 - h.table[2] = HopScotchEntry( 2, 2, 0 ); + IntHashTable h(3, 4); // size 8, H=4 + ASSERT_EQ(8, h.size()) << "size not 8"; + ASSERT_EQ(4, h.H) << "H not 4"; + for (int i = 0; i < h.size(); i++){ + ASSERT_EQ(0, h.table[i].key) << "key[" << i << "] not zero"; + } + + h.table[2] = BucketType(2, 2, 0); + /// - - 2 - - - - - + /// 0 1 2 3 4 5 6 7 + + bool ok; + int b, k; + { + SCOPED_TRACE("no bumps"); + /// - - 2 - - - - - + /// 0 1 2 3 4 5 6 7 + ok = h.pushToRight(2); h.makeNull(2); + /// - - - 2 - - - - + /// 0 1 2 3 4 5 6 7 + ASSERT_EQ(true, ok) << "push[2] failed"; + // [2] will be 2 because + b = 2; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 3; k = 2; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // put it back + /// - - 2 - - - - - + /// 0 1 2 3 4 5 6 7 + h.swap(2, 3); + b = 2; k = 2; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 3; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + + h.table[3] = BucketType(3, 3, 0); + h.table[4] = BucketType(4, 4, 0); + h.table[5] = BucketType(5, 5, 0); + /// - - 2 3 4 5 - - + /// 0 1 2 3 4 5 6 7 + ok = h.pushToRight(3); h.makeNull(3); + /// - - 2 - 4 5 3 - + /// 0 1 2 3 4 5 6 7 + ASSERT_EQ(true, ok) << "push[3] failed"; + b = 3; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 6; k = 3; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + } + { + SCOPED_TRACE("one bump"); + h.table[7] = BucketType(7, 7, 0); + /// - - 2 - 4 5 3 7 + /// 0 1 2 3 4 5 6 7 + // cout << "before push(4)" << h; + ok = h.pushToRight(4); h.makeNull(4); + //cout << "after push(4)" << h; + + /// 7 - 2 - - 5 3 4 + /// 0 1 2 3 4 5 6 7 + ASSERT_EQ(true, ok) << "push[4] failed"; + b = 4; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 7; k = 4; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 0; k = 7; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + } + { + SCOPED_TRACE("two bumps"); + h.table[1] = BucketType(6, 7, 0); + h.table[3] = BucketType(1, 7, 0); + b = 1; k = 6; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 3; k = 1; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + /// 7 6 2 1 - 5 3 4 + /// 0 1 2 3 4 5 6 7 + ok = h.pushToRight(5); h.makeNull(5); + /// 5 6 7 1 2 - 3 4 + /// 0 1 2 3 4 5 6 7 + ASSERT_EQ(true, ok) << "push[5] failed"; + b = 5; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 0; k = 5; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 1; k = 6; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 2; k = 7; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 3; k = 1; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + b = 4; k = 2; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + } + { + SCOPED_TRACE("no more space"); + h.table[5] = BucketType(17); // just fill the place with something + /// 5 6 7 1 2 17 3 4 + /// 0 1 2 3 4 5 6 7 + ok = h.pushToRight(1); + ASSERT_EQ(false, ok) << "push[1] succeeded"; + + } + } + + void pushToRightOverZeroKey(){ + + } + void circularDiff(){ + IntHashTable8 h(3,4); + ASSERT_EQ(8, h.size()); + ASSERT_EQ(0, h.circularDiff(4, 4)); + ASSERT_EQ(1, h.circularDiff(4, 5)); + ASSERT_EQ(3, h.circularDiff(4, 7)); + ASSERT_EQ(4, h.circularDiff(4, 0)); + ASSERT_EQ(6, h.circularDiff(4, 2)); + ASSERT_EQ(7, h.circularDiff(4, 3)); } }; +ADD_TEST_F(HopScotchHashTableTest, circularDiff); +ADD_TEST_F(HopScotchHashTableTest, pushToRight); //ADD_TEST_F(HopScotchHashTableTest, insert); //ADD_TEST_F(HopScotchHashTableTest, copyTable); //ADD_TEST_F(HopScotchHashTableTest, growCondition); diff --git a/YASI_12/ds.IntLinearProbingHashTable.h b/YASI_12/ds.IntLinearProbingHashTable.h index 84fb1bd..7d0f1ff 100644 --- a/YASI_12/ds.IntLinearProbingHashTable.h +++ b/YASI_12/ds.IntLinearProbingHashTable.h @@ -22,7 +22,7 @@ namespace yasi{ public: typedef size_t Key; - typedef KVPair EntryType; // must have a public member `key' + //typedef KVPair EntryType; // must have a public member `key' typedef EntryType BucketType; // each bucket holds an EntryType object typedef EntryType Pair; // which is actually a key-value pair typedef EntryType* BucketEntryPtr; // ptr to an entry object @@ -30,7 +30,7 @@ namespace yasi{ typedef Value ValueType; protected: - typedef LinearProbingHashTable < size_t, Value, KVPairSimple, HashFunction > + typedef LinearProbingHashTable < size_t, Value, HashFunction, BucketType > base; diff --git a/YASI_12/ds.LinearProbingHashTable.h b/YASI_12/ds.LinearProbingHashTable.h index 7952fc2..c199725 100644 --- a/YASI_12/ds.LinearProbingHashTable.h +++ b/YASI_12/ds.LinearProbingHashTable.h @@ -66,7 +66,7 @@ namespace ds{ class LinearProbingHashTable : public HashTableBase < Key, Value, - KVPair, // storing objects, not pointers + EntryType, // storing objects, not pointers HashFunction > { ///////////////// enable testing /////////////////// template @@ -97,7 +97,13 @@ namespace ds{ inline unsigned int circularPrev(const int index) const{ return modSize(index - 1); } - + inline unsigned int circularDiff(const int low, const int high) const{ + if (low == high) return 0; + else if (low < high) + return high - low; + else + return _size - low + high; + } // if a key is already there, it is updated void insert(const Key& k, const Value& v){ @@ -214,6 +220,98 @@ namespace ds{ } } + virtual void removeKey(const Key& k) { + // zero key + if (isKeyZero(&k)){ + if (_zeroUsed) { + _zeroUsed = false; + + _population--; + if (needShrink(_population)) + shrink(); + } + } + else{ + // non-zero key + Pred keyEquals; + unsigned int curFirstBucket = index(k); + int cur = curFirstBucket; + const int searchStart = curFirstBucket; // remember our first posti + do{ + //Pair* pEntry = table[cur]; + + if (isNull(cur)){ + // this slot must be empty + // because we started from the firstBucket, + // the key is not present in the table + return; + } + else{ + // this slot is occupied; check key + if (keyEquals(k, key(cur))){ + // remove + removeEntry(cur); + _population--; + if (needShrink(_population)) + shrink(); + else{ + // shuffle the entries from right to left until an empty slot is found + // (there must be one because we just deleted one) + // this will fix all other's linear probing sequence + const unsigned int startBucket = cur; + //bool crossedBoundary = false; // search crossed the table end and now at the beginning of the table due to mod operation + unsigned int neighbor = circularNext(cur); + while (neighbor != searchStart && // we have not checked all buckets + !isNull(neighbor))// there is an entry at the neighboring bucket and + { + //if (!crossedBoundary && neighbor < cur) { + // // our search just wrapped across the table boundary + // crossedBoundary = true; + //} + + unsigned int neighborFirstBucket = index(key(neighbor)); + if (neighborFirstBucket == neighbor || // is the neighbor at its own first bucket? then it should not move + ( + (curFirstBucket <= cur) // search did not wrap around the end + ? neighborFirstBucket > cur // skip if neighbor's first bucket to the right of cur + : neighborFirstBucket < cur // skip if neighbor's first bucket to the left of cur + ) + ) + { + // yes; skip it + neighbor = circularNext(neighbor); + } + else{ + // the (possibly distant) neighbor is not at its first bucket + + // move it to the left + table[cur] = table[neighbor]; + makeNull(neighbor); + // prepare for the next hop + cur = neighbor; + neighbor = circularNext(neighbor); + curFirstBucket = neighborFirstBucket; + } + } + } + // done + return; + } + else{ + // key didn't match + // move on to the next slot + cur = modSize(cur + 1); + } + } // table[cur] != NULL; go to next iteration of while loop + } while (cur != searchStart); + // we checked all slots of the table; no key match + // cannot remove; done + } + return; + + + } + // forecefully provide a table (and associated state values) // for test purpose only @@ -336,97 +434,7 @@ namespace ds{ virtual bool contains(const Key& key) const override { return lookupKey(key) != NULL; } - virtual void remove(const Key& k) override { - // zero key - if (isKeyZero(&k)){ - if (_zeroUsed) { - _zeroUsed = false; - - _population--; - if (needShrink(_population)) - shrink(); - } - } - else{ - // non-zero key - Pred keyEquals; - unsigned int curFirstBucket = index(k); - int cur = curFirstBucket; - const int searchStart = curFirstBucket; // remember our first posti - do{ - //Pair* pEntry = table[cur]; - - if (isNull(cur)){ - // this slot must be empty - // because we started from the firstBucket, - // the key is not present in the table - return; - } - else{ - // this slot is occupied; check key - if (keyEquals(k, key(cur))){ - // remove - removeEntry(cur); - _population--; - if (needShrink(_population)) - shrink(); - else{ - // shuffle the entries from right to left until an empty slot is found - // (there must be one because we just deleted one) - // this will fix all other's linear probing sequence - const unsigned int startBucket = cur; - //bool crossedBoundary = false; // search crossed the table end and now at the beginning of the table due to mod operation - unsigned int neighbor = circularNext(cur); - while (neighbor != searchStart && // we have not checked all buckets - !isNull(neighbor) )// there is an entry at the neighboring bucket and - { - //if (!crossedBoundary && neighbor < cur) { - // // our search just wrapped across the table boundary - // crossedBoundary = true; - //} - - unsigned int neighborFirstBucket = index(key(neighbor)); - if (neighborFirstBucket == neighbor || // is the neighbor at its own first bucket? then it should not move - ( - (curFirstBucket <= cur) // search did not wrap around the end - ? neighborFirstBucket > cur // skip if neighbor's first bucket to the right of cur - : neighborFirstBucket < cur // skip if neighbor's first bucket to the left of cur - ) - ) - { - // yes; skip it - neighbor = circularNext(neighbor); - } - else{ - // the (possibly distant) neighbor is not at its first bucket - - // move it to the left - table[cur] = table[neighbor]; - makeNull(neighbor); - // prepare for the next hop - cur = neighbor; - neighbor = circularNext(neighbor); - curFirstBucket = neighborFirstBucket; - } - } - } - // done - return; - } - else{ - // key didn't match - // move on to the next slot - cur = modSize(cur + 1); - } - } // table[cur] != NULL; go to next iteration of while loop - } while (cur != searchStart); - // we checked all slots of the table; no key match - // cannot remove; done - } - return; - - - } + virtual void remove(const Key& k) override { removeKey(k); } }; diff --git a/YASI_12/main.cpp b/YASI_12/main.cpp index 114dcbe..18dae64 100644 --- a/YASI_12/main.cpp +++ b/YASI_12/main.cpp @@ -10,6 +10,7 @@ #include "ds.separatechaininghashtable.h" #include "ds.intlinearprobinghashtable.h" #include "ds.linearprobinghashtable.h" +#include "ds.hopscotchhashtable.h" //#include "Sorter.h" #include