From a0ab5017bd2a973f22f9c75c9d938f4ec08e7f8b Mon Sep 17 00:00:00 2001 From: saad0105050 Date: Sun, 21 Sep 2014 03:25:16 -0400 Subject: [PATCH] Fixed HopScotchHashTable. Todo: use bitmap to navigate through items in the same bucket. Signed-off-by: saad0105050 --- YASI_12/common.h | 2 + YASI_12/ds.HopScotchHashTable.h | 735 ++++++++++++++++++++++++---- YASI_12/ds.LinearProbingHashTable.h | 36 +- YASI_12/main.cpp | 3 +- YASI_12/utils.h | 30 +- 5 files changed, 690 insertions(+), 116 deletions(-) diff --git a/YASI_12/common.h b/YASI_12/common.h index 4d31bef..d65b01f 100644 --- a/YASI_12/common.h +++ b/YASI_12/common.h @@ -30,4 +30,6 @@ namespace yasi{ #define DELETE_ARR_SAFE(x) {if( (x) != 0 ) delete[] (x); (x) = 0; } #define ARR_LENGTH(x) ( sizeof( (x) )/sizeof( (x)[0] ) ) +#define IS_POW_OF_TWO(x) ( ((x) >= 1) && ( ((x) & ((x)-1)) == 0) ) + } // namespace yasi \ No newline at end of file diff --git a/YASI_12/ds.HopScotchHashTable.h b/YASI_12/ds.HopScotchHashTable.h index b1ab8c7..aeae980 100644 --- a/YASI_12/ds.HopScotchHashTable.h +++ b/YASI_12/ds.HopScotchHashTable.h @@ -70,6 +70,7 @@ class HopScotchHashTable //---------- HopScotch Properties/Methods ---------- static const unsigned int INIT_H = 4; size_t H; // the neighborhood size + size_t logH; // log of H size_t _zeroKeyBucket; // the bucket containing (hashed) zero key #if _DEBUG int _numHops; @@ -81,6 +82,8 @@ class HopScotchHashTable virtual ~HopScotchHashTable(){} HopScotchHashTable(unsigned int logSize = INIT_LOGSIZE, unsigned int H = INIT_H) :H(H), IntLinearProbingHashTable(logSize){ assert(_size >= H); + assert(IS_POW_OF_TWO(H) == true); + logH = whichPowerOfTwo(H); #if _DEBUG _numHops = 0; #endif @@ -193,38 +196,161 @@ class HopScotchHashTable } } + bool canSwapCurWithEmpty(const int firstBucket, const int cur, const int hashCur, const int emptySlot){ + // check boundaries + if (firstBucket == emptySlot || + firstBucket == cur || + cur == emptySlot) { + return false; + } + // in forward direction + // current cannot go to the left of firstBucket or right of emptySlot + if (firstBucket < emptySlot && // non-circular direction + (cur <= firstBucket || cur >= emptySlot) ){ + return false; + } + + // in circular direction + // current cannot go to the left of firstBucket or right of emptySlot + if (firstBucket > emptySlot && // circular direction + (cur <= firstBucket && cur >= emptySlot)){ + return false; + } + + bool hashCurIsAtRightOfFirstBucket = false; + bool hashCurIsAtLeftOfEmptySlot = false; + bool emptySlotInNeighborhoodOfHashCur = false; + int diffHashCurToEmptySlot = 0; + // regular or circular search? + if (firstBucket < emptySlot){ + // regular search + diffHashCurToEmptySlot = emptySlot - hashCur; // circularDiff(hashCur, emptySlot); + + hashCurIsAtLeftOfEmptySlot = diffHashCurToEmptySlot > 0; + hashCurIsAtRightOfFirstBucket = hashCur - firstBucket > 0; + emptySlotInNeighborhoodOfHashCur = diffHashCurToEmptySlot < H; + } + else{ + // firstBucket >= emptySlot + if (firstBucket == emptySlot) return NULL; + + // circular search + diffHashCurToEmptySlot = circularDiff(hashCur, emptySlot); + + if (hashCur > firstBucket){ + hashCurIsAtRightOfFirstBucket = true; + hashCurIsAtLeftOfEmptySlot = true; + emptySlotInNeighborhoodOfHashCur = diffHashCurToEmptySlot < H; + } + else{ + if (hashCur == firstBucket) return NULL; + + // hashCur < firstBucket + if (hashCur < emptySlot){ + hashCurIsAtRightOfFirstBucket = true; + hashCurIsAtLeftOfEmptySlot = true; + emptySlotInNeighborhoodOfHashCur = diffHashCurToEmptySlot < H; + } + else{ + // hashCur >= emptySlot + return NULL; + } + } + } + + return hashCurIsAtRightOfFirstBucket && + hashCurIsAtLeftOfEmptySlot && + emptySlotInNeighborhoodOfHashCur + ; + } + + // repeatedly push this empty slot to the left until it comes within + // the neighborhood of firstBucket + // returns the ptr to the slot on success + // returns NULL if the pull is unsuccessful + BucketType* pullFromLeft(const int firstBucket, const int emptySlot){ + //when to stop + int remainingSlots = circularDiff(firstBucket, emptySlot); + if (remainingSlots < H){ + // the empty slot is wihing the neighborhood of firstBucket + return &table[emptySlot]; + } + // H + // <--------------> + // i - - - j - - - [] - - - - + // #j + // + // i is the firstBucket + // try to find an item j to the left of emptySlot + // which is withing H-1 distable from emptySlot, and + // whose hashed index #j is within H-1 distance from emptySlot + // and to the right of i + + // look in the H-1 slots to the left of emptySlot + int leftLimit = modSize(emptySlot - H + 1); + for (int cur = leftLimit; cur != emptySlot; cur = circularNext(cur)){ + size_t curKey = table[cur].key; + if (curKey == 0) continue; + int hashCur = index(curKey); + + if (canSwapCurWithEmpty(firstBucket, cur, hashCur, emptySlot)) + { + // candidate for swap + swap(cur, emptySlot); + // repeat + return pullFromLeft(firstBucket, cur); + } + } + // no candidate for swap found + return NULL; + } + virtual BucketType* insertKey(const Key& k){ if (k == 0){ cerr << "HopScotchHashTable::insertKey(): key 0 is not allowed." << endl; return NULL; } + // check whether we need to grow for this new item + if (needGrow(_population + 1)) + grow(); int firstBucket = index(k); int cur = firstBucket; - int end = modSize(firstBucket + H); - while (!isNull(cur) && cur != end){ + while (!isNull(cur) ){ cur = circularNext(cur); + if (cur == firstBucket) {// we tried all buckets + grow(); + // time to grow + return insertKey(k); + } + } + + // found an empty slot + if (circularDiff(firstBucket, cur) < H){ + // empth slot within the neighborhood of firstBucket + table[cur].key = k; + _population++; + return &table[cur]; } - if (cur == end){ - // could not insert within neighborhood - // bump the last guy - if (pushToRight( circularPrev(end) )){ - table[end].key = k; + else{ + // empty slot out of the immediate neighborhood + // cur is the empty bucket + // pull it within the immediate neighborhood + BucketType* pBucket = pullFromLeft(firstBucket, cur); + if (pBucket){ + // moved the empty slot to the immediate neighborhood + table[cur].key = k; _population++; - return &table[end]; + return &table[cur]; } else{ - // could not insert - // need to expand table - return NULL; + // could not bring the NULL element back + // time to resize and rehash + grow(); + return insertKey(k); } } - else{ - // cur must be NULL - table[cur].key = k; - _population++; - return &table[end]; - } + } virtual BucketType* lookupKey(const Key& k){ @@ -239,7 +365,8 @@ class HopScotchHashTable _numHops = 0; #endif // keep probing to the right - for (int i = 0; ; i = circularNext(i) ){ + // the item, if exists, must be in the neighborhood H + for (int i = 0; i < H; i = circularNext(i) ){ if (table[firstBucket + i].key == k) return &table[firstBucket + i]; #if _DEBUG @@ -282,100 +409,465 @@ class HopScotchHashTableTest : public yasi::Test typedef HopScotchHashTable > IntHashTable8; typedef HopScotchHashTable > IntHashTable16; typedef IntHashTable16 IntHashTable; // default - typedef IntLinearProbingHashTable > IntHashTable32; + typedef HopScotchHashTable > IntHashTable32; typedef IntHashTable::BucketType BucketType; typedef IntHashTable::BucketEntryPtr BucketEntryPtr; typedef IntHashTable::Pair Pair; public: // inherit all public tests - void pushToRight(){ - IntHashTable h(3, 4); // size 8, H=4 - ASSERT_EQ(8, h.size()) << "size not 8"; - ASSERT_EQ(4, h.H) << "H not 4"; - for (int i = 0; i < h.size(); i++){ - ASSERT_EQ(0, h.table[i].key) << "key[" << i << "] not zero"; + + //void pushToRight(){ + // IntHashTable h(3, 4); // size 8, H=4 + // ASSERT_EQ(8, h.size()) << "size not 8"; + // ASSERT_EQ(4, h.H) << "H not 4"; + // for (int i = 0; i < h.size(); i++){ + // ASSERT_EQ(0, h.table[i].key) << "key[" << i << "] not zero"; + // } + + // h.table[2] = BucketType(2, 2, 0); + // /// - - 2 - - - - - + // /// 0 1 2 3 4 5 6 7 + + // bool ok; + // int b, k; + // { + // SCOPED_TRACE("no bumps"); + // /// - - 2 - - - - - + // /// 0 1 2 3 4 5 6 7 + // ok = h.pushToRight(2); h.makeNull(2); + // /// - - - 2 - - - - + // /// 0 1 2 3 4 5 6 7 + // ASSERT_EQ(true, ok) << "push[2] failed"; + // // [2] will be 2 because + // b = 2; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 3; k = 2; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // // put it back + // /// - - 2 - - - - - + // /// 0 1 2 3 4 5 6 7 + // h.swap(2, 3); + // b = 2; k = 2; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 3; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + + // h.table[3] = BucketType(3, 3, 0); + // h.table[4] = BucketType(4, 4, 0); + // h.table[5] = BucketType(5, 5, 0); + // /// - - 2 3 4 5 - - + // /// 0 1 2 3 4 5 6 7 + // ok = h.pushToRight(3); h.makeNull(3); + // /// - - 2 - 4 5 3 - + // /// 0 1 2 3 4 5 6 7 + // ASSERT_EQ(true, ok) << "push[3] failed"; + // b = 3; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 6; k = 3; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // } + // { + // SCOPED_TRACE("one bump"); + // h.table[7] = BucketType(7, 7, 0); + // /// - - 2 - 4 5 3 7 + // /// 0 1 2 3 4 5 6 7 + // // cout << "before push(4)" << h; + // ok = h.pushToRight(4); h.makeNull(4); + // //cout << "after push(4)" << h; + + // /// 7 - 2 - - 5 3 4 + // /// 0 1 2 3 4 5 6 7 + // ASSERT_EQ(true, ok) << "push[4] failed"; + // b = 4; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 7; k = 4; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 0; k = 7; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // } + // { + // SCOPED_TRACE("two bumps"); + // h.table[1] = BucketType(6, 7, 0); + // h.table[3] = BucketType(1, 7, 0); + // b = 1; k = 6; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 3; k = 1; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // /// 7 6 2 1 - 5 3 4 + // /// 0 1 2 3 4 5 6 7 + // ok = h.pushToRight(5); h.makeNull(5); + // /// 5 6 7 1 2 - 3 4 + // /// 0 1 2 3 4 5 6 7 + // ASSERT_EQ(true, ok) << "push[5] failed"; + // b = 5; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 0; k = 5; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 1; k = 6; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 2; k = 7; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 3; k = 1; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // b = 4; k = 2; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + // } + // { + // SCOPED_TRACE("no more space"); + // h.table[5] = BucketType(17); // just fill the place with something + // /// 5 6 7 1 2 17 3 4 + // /// 0 1 2 3 4 5 6 7 + // ok = h.pushToRight(1); + // ASSERT_EQ(false, ok) << "push[1] succeeded"; + + // } + //} + + // tests whether it is ok to move emptySlot to cur for a given firstBucket + // hash(cur) should lie between firstBucket and emptySlot, and + // hash(cur) should be withing H-1 of emptySlot + void canSwapCurWithEmpty(){ + IntHashTable h(4, 4); // size 16, H=4 + int f, c, hc, e; + { + string str = "non-circular. boundary involving f,c,e"; + SCOPED_TRACE(str); + f = 4, c = 5, hc = 5, e = 4; + + // f == e + f = e; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + f = 4; + + // f == c + c = f; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // c < f + c = f - 1; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // c = e + c = e; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // c > e + c = e + 1; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + } + { + string str = "circular. boundary involving f,c,e"; + SCOPED_TRACE(str); + f = 14, c = 14, hc = 15, e = 1; + + // f == c + c = f; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // c < f + c = f - 1; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // c == e + c = e; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // c > e + c = e + 1; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; } - h.table[2] = BucketType(2, 2, 0); - /// - - 2 - - - - - - /// 0 1 2 3 4 5 6 7 + { + string str = "non-circular. hash(cur) not in range"; + SCOPED_TRACE(str); + f = 4, c = 5, e = 8; + + // hc < f + hc = f - 1; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc == f + hc = f; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc == e + hc = e; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc > e + hc = e + 1; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc < e - H + hc = e - h.H - 1; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc == e - H + hc = e - h.H; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; - bool ok; - int b, k; + } { - SCOPED_TRACE("no bumps"); - /// - - 2 - - - - - - /// 0 1 2 3 4 5 6 7 - ok = h.pushToRight(2); h.makeNull(2); - /// - - - 2 - - - - - /// 0 1 2 3 4 5 6 7 - ASSERT_EQ(true, ok) << "push[2] failed"; - // [2] will be 2 because - b = 2; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 3; k = 2; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - // put it back - /// - - 2 - - - - - - /// 0 1 2 3 4 5 6 7 - h.swap(2, 3); - b = 2; k = 2; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 3; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - - h.table[3] = BucketType(3, 3, 0); - h.table[4] = BucketType(4, 4, 0); - h.table[5] = BucketType(5, 5, 0); - /// - - 2 3 4 5 - - - /// 0 1 2 3 4 5 6 7 - ok = h.pushToRight(3); h.makeNull(3); - /// - - 2 - 4 5 3 - - /// 0 1 2 3 4 5 6 7 - ASSERT_EQ(true, ok) << "push[3] failed"; - b = 3; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 6; k = 3; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + string str = "circular. hash(cur) not in range"; + SCOPED_TRACE(str); + f = 12, c = 2, e = 3; + + // hc < f + hc = f - 1; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc == f + hc = 4; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc == e + hc = e; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc > e + hc = e + 1; + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc < e - H + hc = h.modSize(e - h.H - 1); + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // hc == e - H + hc = h.modSize(e - h.H); + ASSERT_EQ(false, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + } { - SCOPED_TRACE("one bump"); - h.table[7] = BucketType(7, 7, 0); - /// - - 2 - 4 5 3 7 - /// 0 1 2 3 4 5 6 7 - // cout << "before push(4)" << h; - ok = h.pushToRight(4); h.makeNull(4); - //cout << "after push(4)" << h; - - /// 7 - 2 - - 5 3 4 - /// 0 1 2 3 4 5 6 7 - ASSERT_EQ(true, ok) << "push[4] failed"; - b = 4; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 7; k = 4; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 0; k = 7; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + string str = "hash(cur) in range"; + SCOPED_TRACE(str); + + // non-circular + f = 4, c = 8, e = 10; + + // hc > e-H + hc = e - h.H + 1; + ASSERT_EQ(true, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; + + // circular + f = 11, c = 15, e = 2; + + // hc > e-H + hc = h.modSize(e - h.H + 1); + ASSERT_EQ(true, h.canSwapCurWithEmpty(f, c, hc, e)) + << "canSwapCurWithEmpty(" << f << ", " << c << ", " << hc << ", " << e << ")"; } + + } + + void pullFromLeft(){ + IntHashTable h(4, 4); // size 16, H=4 + ASSERT_EQ(16, h.size()); + ASSERT_EQ(0, h.population()); + ASSERT_EQ(4, h.H); + + int k, b, f; + BucketType* p; + + // test scenarios: + // - empty slot is within H-1 of firstBucket + // - the replacement item j is found, which is within H-1 of firstBucket + // - the replacement item j is found, which is not within H-1 of firstBucket + // - the replacement item j could not be found within H-1 of emptySlot + // - above, circular { - SCOPED_TRACE("two bumps"); - h.table[1] = BucketType(6, 7, 0); - h.table[3] = BucketType(1, 7, 0); - b = 1; k = 6; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 3; k = 1; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - /// 7 6 2 1 - 5 3 4 - /// 0 1 2 3 4 5 6 7 - ok = h.pushToRight(5); h.makeNull(5); - /// 5 6 7 1 2 - 3 4 - /// 0 1 2 3 4 5 6 7 - ASSERT_EQ(true, ok) << "push[5] failed"; - b = 5; k = 0; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 0; k = 5; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 1; k = 6; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 2; k = 7; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 3; k = 1; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; - b = 4; k = 2; ASSERT_EQ(k, h.table[b].key) << "[" << b << "] not " << k; + SCOPED_TRACE("case #1"); + h.insert(4, 4); + h.insert(5, 5); + h.insert(6, 6); + ASSERT_EQ(3, h.population()); + + // 20 is hashed in 4, should be placed in 7 + k = 20; b = 7; + ASSERT_EQ(true, h.isNull(b)); + p = h.pullFromLeft(4, b); + ASSERT_NE(NULL, (int)p); + ASSERT_EQ(&h.table[b], p); + h.table[b] = BucketType(k, k, 0); + + // circular + h.clear(); + ASSERT_EQ(16, h.size()); + ASSERT_EQ(0, h.population()); + ASSERT_EQ(4, h.H); + + h.insert(14, 14); + h.insert(15, 15); + h.insert(31, 31); // hash value 15 + // 17 is hashed in 4, should be placed in 7 + k = 17; b = 1; + ASSERT_EQ(true, h.isNull(b)); + p = h.pullFromLeft(14, b); + ASSERT_NE(NULL, (int)p); // already within H + ASSERT_EQ(&h.table[b], p); + + } + { + SCOPED_TRACE("the replacement item j is found, which is within H-1 of firstBucket"); + h.clear(); + ASSERT_EQ(16, h.size()); + ASSERT_EQ(0, h.population()); + ASSERT_EQ(4, h.H); + + h.table[3] = BucketType(3); + h.table[4] = BucketType(4); + h.table[5] = BucketType(21); // hash value = 6 + h.table[6] = BucketType(22); // hash value = 5 + h.table[7] = BucketType(19); // hash value = 3 + + cout << "before pulling 8\n" << h; + p = h.pullFromLeft(3, 8); // empty slot should go to 5 + cout << "after pulling 8\n" << h; + ASSERT_EQ(&h.table[5], p); + ASSERT_EQ(3, h.table[3].key); + ASSERT_EQ(4, h.table[4].key); + ASSERT_EQ(0, h.table[5].key); + ASSERT_EQ(22, h.table[6].key); + ASSERT_EQ(19, h.table[7].key); + ASSERT_EQ(21, h.table[8].key); + + // now try a case where key hashes prohibit their move + h.clear(); + h.table[3] = BucketType(3); + h.table[4] = BucketType(4); + h.table[5] = BucketType(18); // hash value = 2 + h.table[6] = BucketType(17); // hash value = 1 + h.table[7] = BucketType(21); // hash value = 5 + + cout << "before pulling 8\n" << h; + p = h.pullFromLeft(3, 8); // empty slot should go to 4 + cout << "after pulling 8\n" << h; + ASSERT_EQ(&h.table[4], p); + ASSERT_EQ(3, h.table[3].key); + ASSERT_EQ(0, h.table[4].key); + ASSERT_EQ(18, h.table[5].key); + ASSERT_EQ(17, h.table[6].key); + ASSERT_EQ(4, h.table[7].key); + ASSERT_EQ(21, h.table[8].key); + + /// circular + h.clear(); + cout << "after clear\n" << h; + ASSERT_EQ(16, h.size()); + ASSERT_EQ(0, h.population()); + ASSERT_EQ(4, h.H); + + k = 14; h.table[k] = BucketType(k); + k = 15; h.table[k] = BucketType(k); + k = 0; h.table[k] = BucketType(159); // hash value 15 + k = 1; h.table[k] = BucketType(30); // hash value 14 + k = 2; h.table[k] = BucketType(k); + k = 3; h.table[k] = BucketType(k); + // for firstBucket 15, empty slot [4] should move to [2] + // because [1]'s hash value is 14 + b = 2; f = 15; + cout << "before moving from [4]\n" << h; + ASSERT_EQ(true, h.isNull(4)); + p = h.pullFromLeft(f, 4); + cout << "after moving from [4]\n" << h; + ASSERT_EQ(&h.table[2], p); + ASSERT_EQ(14, h.table[14].key); + ASSERT_EQ(15, h.table[15].key); + ASSERT_EQ(159, h.table[0].key); + ASSERT_EQ(30, h.table[1].key); + ASSERT_EQ(true, h.isNull(2)); + ASSERT_EQ(3, h.table[3].key); + ASSERT_EQ(2, h.table[4].key); } { - SCOPED_TRACE("no more space"); - h.table[5] = BucketType(17); // just fill the place with something - /// 5 6 7 1 2 17 3 4 - /// 0 1 2 3 4 5 6 7 - ok = h.pushToRight(1); - ASSERT_EQ(false, ok) << "push[1] succeeded"; + SCOPED_TRACE("the replacement item j is found, which is NOT within H-1 of firstBucket"); + h.clear(); + ASSERT_EQ(16, h.size()); + ASSERT_EQ(0, h.population()); + ASSERT_EQ(4, h.H); + + h.table[0] = BucketType(31); // hash to 15 + h.table[1] = BucketType(1); + h.table[2] = BucketType(17); // hash 1 + h.table[3] = BucketType(33); // hash 1 + for (int i = 4; i < 16; i++){ + h.table[i] = BucketType(i); + } + h.table[13] = BucketType(0); + /// keys: 31 1 17 33 4 5 6 7 8 9 10 11 12 0 14 15 + /// index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + cout << "before moving from [13]\n" << h; + p = h.pullFromLeft(1, 13); // empty slot should go to 4 + cout << "after moving from [13]\n" << h; + /// keys: 31 1 17 33 0 5 6 4 8 9 7 11 12 10 14 15 + /// index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + for (int i = 1; i < 16; i++){ + if (i == 4) + ASSERT_EQ(true, h.isNull(i)); + else if (i == 13) + ASSERT_EQ(10, h.table[i].key); + else if (i == 10) + ASSERT_EQ(7, h.table[i].key); + else if (i == 7) + ASSERT_EQ(4, h.table[i].key); + else if (i == 2) + ASSERT_EQ(17, h.table[i].key); + else if (i == 3) + ASSERT_EQ(33, h.table[i].key); + else + ASSERT_EQ(i, h.table[i].key); + } + + /// circular + h.clear(); + f = 4; + + for (int i = 0; i < 16; i++) h.table[i] = BucketType(i); + h.table[3].key = 0; // make [3] null + h.table[0].key = 0; // make [0] null + h.table[15].key = 17; // make [15].key hash to 1, cannot move + h.table[13].key = 0; // make [13] null + h.table[10].key = 25; // make [10].key hash to 25 + h.table[11].key = 0; // make [11] null + h.table[9].key = 19; // make [9].key hash to 3, cannot move + h.table[5].key = 2; // make [5].key hash to 2, cannot move + + /// keys: 0 1 2 0 4 2 6 7 8 19 25 0 12 0 14 17 + /// index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + cout << "before moving from [3]\n" << h; + p = h.pullFromLeft(4, 3); // empty slot should settle at [7] + cout << "after moving from [3]\n" << h; + /// keys: 0 14 2 1 4 2 6 0 8 19 7 0 25 0 12 17 + /// index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + + ASSERT_EQ(&h.table[7], p); + ASSERT_EQ(1, h.table[3].key); // first swap with 1 + ASSERT_EQ(14, h.table[1].key); // then swap with 14 + ASSERT_EQ(12, h.table[14].key); // then swap with 12 + ASSERT_EQ(25, h.table[12].key); // then swap with 10 + ASSERT_EQ(7, h.table[10].key); // then swap with 7 + ASSERT_EQ(0, h.table[7].key); // 7 is now empty slot + } + { + SCOPED_TRACE("the replacement item j could not be found within H-1 of emptySlot"); + h.clear(); + ASSERT_EQ(16, h.size()); + ASSERT_EQ(0, h.population()); + ASSERT_EQ(4, h.H); + + for (int i = 0; i < 15; i++){ + h.table[i] = BucketType(i * 16 + 2 ); // all hash to 2 + } + p = h.pullFromLeft(3, 9); // should fail + ASSERT_EQ(NULL, (int)p); + p = h.pullFromLeft(8, 2); // should fail + ASSERT_EQ(NULL, (int)p); } } @@ -394,7 +886,7 @@ class HopScotchHashTableTest : public yasi::Test IntHashTable h(4, 4); // size 16, H=4 ASSERT_EQ(16, h.size()); ASSERT_EQ(0, h.population()); - ASSERT_EQ(6, h.H); + ASSERT_EQ(4, h.H); // try to find an empty key ASSERT_EQ(NULL, (int)h.lookupKey(7)); @@ -403,24 +895,63 @@ class HopScotchHashTableTest : public yasi::Test // insert one key, find it, and remove it h.put(5, 50); ASSERT_EQ(true, h.contains(5)); - ASSERT_EQ(5, *h.lookupKey(5)); + ASSERT_EQ(5, h.lookupKey(5)->key); ASSERT_EQ(50, *h.get(5)); ASSERT_EQ(1, h.population()); - ASSERT_EQ(16, h.population()); + ASSERT_EQ(16, h.size()); h.remove(5); ASSERT_EQ(false, h.contains(5)); ASSERT_EQ(NULL, (int)h.lookupKey(5)); ASSERT_EQ(NULL, (int)h.get(5)); ASSERT_EQ(0, h.population()); - ASSERT_EQ(16, h.population()); + ASSERT_EQ(16, h.size()); - // insert many keys that + // insert many keys that should not cause resize + h.clear(); + ASSERT_EQ(16, h.size()); + int numItemsWithoutResize = h.maxPopulationWithoutGrow(); + for (int i = 0; i < numItemsWithoutResize; i++){ + h.insert(16+i, 16+i); + } + ASSERT_EQ(16, h.size()); + ASSERT_EQ(numItemsWithoutResize, h.population()); + + // test remove + ASSERT_EQ(true, h.contains(21)); + h.remove(21); + ASSERT_EQ(false, h.contains(21)); + ASSERT_EQ(16, h.size()); + ASSERT_EQ(numItemsWithoutResize - 1, h.population()); + // put it back + h.insert(21, 21); + + // now add one more item, should cause grow() + h.insert(15, 15); + ASSERT_EQ(true, h.contains(15)); + ASSERT_EQ(32, h.size()); + ASSERT_EQ(numItemsWithoutResize + 1, h.population()); + + ///////// test resizing when a bucket gets more than H items + IntHashTable32 h2(4, 4); // keep the hash func mod 32 so that we can grow and rehash + for (int i = 0; i < h2.H; i++){ + h2.insert(16 * i + 2, i); // all hashed to 2 + } + ASSERT_EQ(16, h2.size()); + ASSERT_EQ(h2.H, h2.population()); + // this insert should cause grow + cout << "before inserting " << 16 * h2.H + 2 << endl << h2; + h2.insert(16 * h2.H + 2, h2.H); + cout << "after inserting " << 16 * h2.H + 2 << endl << h2; + ASSERT_EQ(32, h2.size()); + ASSERT_EQ(h2.H+1, h2.population()); } }; ADD_TEST_F(HopScotchHashTableTest, circularDiff); -ADD_TEST_F(HopScotchHashTableTest, pushToRight); -//ADD_TEST_F(HopScotchHashTableTest, insert); +ADD_TEST_F(HopScotchHashTableTest, canSwapCurWithEmpty); +ADD_TEST_F(HopScotchHashTableTest, pullFromLeft); +ADD_TEST_F(HopScotchHashTableTest, insertRemoveLookup); + //ADD_TEST_F(HopScotchHashTableTest, copyTable); //ADD_TEST_F(HopScotchHashTableTest, growCondition); //ADD_TEST_F(HopScotchHashTableTest, shrinkCondition); diff --git a/YASI_12/ds.LinearProbingHashTable.h b/YASI_12/ds.LinearProbingHashTable.h index c199725..fcecaa5 100644 --- a/YASI_12/ds.LinearProbingHashTable.h +++ b/YASI_12/ds.LinearProbingHashTable.h @@ -104,6 +104,18 @@ namespace ds{ else return _size - low + high; } + inline unsigned bool circularBetweenInclusive(const int bucket, const int left, const int right) const{ + if (left == right && bucket != left) return false; + if (left < right){ + return left <= bucket && bucket <= right; + } + else{ + // left > right + if (bucket >= left) return true; + else if (bucket <= right) return true; + else return false; + } + } // if a key is already there, it is updated void insert(const Key& k, const Value& v){ @@ -395,15 +407,15 @@ namespace ds{ } - void clear(){ - //if (table){ - // for (unsigned int i = 0; i < _size; i++){ - // removeEntry(i); - // } - //} - // DELETE_ARR_SAFE(table); + virtual void clear(){ + if (table){ + for (unsigned int i = 0; i < _size; i++){ + if(!isNull(i)) + removeEntry(i); + } + } _zeroUsed = false; - _size = _population = _logSize = 0; + _population = 0; } public: @@ -422,10 +434,10 @@ namespace ds{ DELETE_SAFE(_pZeroValue); } virtual Value* get(const Key& key) const override { - //Pair* kv = lookupKey(key); - //if (kv) - // return &(kv->value); - //else + Pair* kv = lookupKey(key); + if (kv) + return &(kv->value); + else return NULL; } virtual void put(const Key& key, const Value& value) override { diff --git a/YASI_12/main.cpp b/YASI_12/main.cpp index 18dae64..2f585eb 100644 --- a/YASI_12/main.cpp +++ b/YASI_12/main.cpp @@ -1,4 +1,5 @@ #include "common.h" +#include "utils.h" #include "ds.singlylinkedlist.h" #include "ds.doublylinkedlist.h" #include "ds.arraybinarytree.h" @@ -11,6 +12,7 @@ #include "ds.intlinearprobinghashtable.h" #include "ds.linearprobinghashtable.h" #include "ds.hopscotchhashtable.h" + //#include "Sorter.h" #include @@ -20,7 +22,6 @@ #include #include using namespace yasi; -using namespace yasi::ds; using namespace std; void testSort(); diff --git a/YASI_12/utils.h b/YASI_12/utils.h index a43d40f..b52d71f 100644 --- a/YASI_12/utils.h +++ b/YASI_12/utils.h @@ -67,6 +67,22 @@ string strPurge(const string str, const char* removeChars){ return trimmed; } +// given a power-of-two input n=2^k, return k +// if n is not a non-negative power of 2, return -1 +int whichPowerOfTwo(unsigned int n){ + if (n == 1) return 0; + if (n <= 0) return -1; + if (IS_POW_OF_TWO(n)){ + unsigned int k = 0; + for (; n != 1; n >>= 1) + k++; + return k; + } + else{ + return -1; + } +} + ///////////// enable in compile-time a certain code block depending on boolean predicate ///////////// ///////////// also, check if a certain class contains a certain function /////////////////////////////// //////////////////////// see http://stackoverflow.com/questions/257288/is-it-possible-to-write-a-c-template-to-check-for-a-functions-existence#264088 @@ -112,10 +128,21 @@ typename enable_if