From e4ecd4d4f75cd01133793ad4d552a7264d6156a6 Mon Sep 17 00:00:00 2001 From: ChunjiangZhu Date: Fri, 25 Oct 2019 12:14:58 -0400 Subject: [PATCH] sw add rangequery --- .DS_Store | Bin 14340 -> 14340 bytes similarity_search/.DS_Store | Bin 10244 -> 10244 bytes .../src/method/small_world_rand.cc | 113 +++++++++++++++++- 3 files changed, 112 insertions(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index 92d8ceb4515dd42a1ef8579e5fc54e76fae76a1a..ea845503291b874932e86828fdbd1bf79482f425 100644 GIT binary patch delta 1153 zcmeH`O-K}B7{{N#mAy0QI;-ybDy}Q8i>s~X7h3tzu0jP`3TbMRwK@|`y1VACsaY(9 zL?2Rz!GwySE)iZz>(HSfg6LFH9;!=p=n~bTi&B@~*%3TMw~ozWV4mOe&inBDzwgZG z%;>%64jbQg)~Q=+v6**u5S!3r`s|VIaAIy zI&yiL<^qY69FfF?Af746uI4s_xG4&w+0F@#~nF^)-0 z;Vdp;7MF1aH*p8|v4|x+z+*fy0wr$4!*|tj}VvOIDyfWKxordIe2cW91csZ4jYZ zthz>aDVsz-*2?;AvaDX_1}^%OB_?eHMm) z&;j#|XlRU_vOKM$T!R l!8x4A#iZ>y%;OraDPFVrEAQQ-Psf5ri#Jz6oIXkU(n%Ob}5)sfuFhuF1xJ(QQczQ8Z%I z1VytZUX4f5#Kh>qi#PRXVhOr7vjvLn+PRkk*UJC@$fi zI>Dw4Ycvm0DMuQc7@In;$5d`edD7O-#IZY(sZ{)V2hhaU_XsfmOZ+nm3zQiu~a_p7R=Qc4w{m*ke> z>&PD@LW!HS5g$1~W{5@-1tep(cy@(9kdNdO`9i*t@8k#h34mccir_*G>fuErn$U$_ z^kXjuFpM#b;}9kg#97QC3>8rCteaZGd11cV!}=sE623KG9bkjh63@5gt9}-coCR+sPX26g zcU~dMkX3SvtdR#~oxC8g$vd)Px%^DNnj7`XP@m;2U>O_1C=TNYCNYKMnBDZIA%-~S xkwgj?a1obq8CS51o4AcTFmdSluxub+_ErETP4+ z?+9tCQ(;nFI+UapL>g2?bg9UT+NDE>2s*0NrFUod(5Z8WE(7y_-jA8*8EYPEo~kuj zl)Bw}98$La%(`8_Vshzb+m?idxOir@CD;=kNoiSm1;wt4YJJtz8r!1zyy~z%mzo@6 z7Ax{aVLo}CoRXfQC`@2)g~)4mjv_Iz$AW0kO*x##L|I`GXKR=z%1X9M3Ntfr^q|r* zNn$1z5FZ7!#mQ%9Ed=N!Jf9^~Pi6l@eWs{4l$W1-ur%}306Es7!G)Et4 zk-pI~Vqn2q*dRkeE(%bBO6)`(>d}Bk>_;08q8%MjaR?d?qZdI8;TVQ7iqklQvp9!w zT*Wn9#|=zi5_fPH_wc}mDLle7W-yBvc!^hdJv|ZI>9EL#(uO~j7Te>J)~9FX6_%7% z*35_HN_NSiH<^=lugwv%F>5>*DYp%FX-5aO;Bl78eHqJ~a+=6hc6dEOwbAd@dNkE* z5ZNhj5QO&dykh9(BB3n00#R8Y!(&yI()U<0`p2oVzo8d4neqY&xR)`2FGE zsCCM+{?M5l5*hxQ=N)~bCHg|&X_;2&C;b9J$#T&fG0v54^&-jWTSi$P_h;qvO NKh(b;N*_@!{05?gA{_t# delta 1357 zcmeIwOH30%7zgnA^V|(9OnFYB6tIGT7@#5sFi@o`F9o3l0t9KRnt1R+j0YkZ<3SHzIO@Sfi3go_iB~-v_pr0`|7P=T_V-P+Otehf zqhd{a>grP)q~Oiul;FC|0c)q)?{T-ON0bAUj%Z4&GUW8Bql(?>Q#1jRu0*wrBO=VKKKxL6e>(vUWN~D@dqx5REWD{tkNMrPBt+Y!t zL@WkGN(75-e367Mi!DFA5{tfsk`nzp%spPVO`$t0O2^W-skMV81X z@|pYqK@8%M1cn?GpcoaXfep2=V>gR~_OO+M3PaRMdZ?Gqt1srbYu)=?iC@v>bLjp6h41rpN&JmY-Xhijmhf~SS zvPBNlBXi}w9y#>7te-ct4T8`e%4@v}zsuoHc^jGJ(D;yWGBKQNDv~6}ARhqEq$ee% z5^L`oa=MfuE-8dhZ7r8r$Ef1fRF9hj>S1M-#5(&u!^8Z;wb1YgdTSfo85*75Q1d3I z=4(EvxYzpI&5Q--&G~`9G<`{4lXv6;S>di<$v5(ytRe!Dh~~lx+}DT#+|lT(}k8T-Swu$Q)P02u5)j$JbcTA@>5VV-mM;8xQahk1&lFSil=BVHt1n5i9tD T)fs=*iRk~j|EO-zpLO~dL~KvW diff --git a/similarity_search/src/method/small_world_rand.cc b/similarity_search/src/method/small_world_rand.cc index e548d73..63d14a8 100644 --- a/similarity_search/src/method/small_world_rand.cc +++ b/similarity_search/src/method/small_world_rand.cc @@ -600,7 +600,9 @@ void SmallWorldRand::addCriticalSection(MSWNode *newElement){ template void SmallWorldRand::Search(RangeQuery* query, IdType) const { - throw runtime_error("Range search is not supported!"); +// throw runtime_error("Range search is not supported!"); + if (searchAlgoType_ == kV1Merge) SearchV1Merge(query); +// else SearchOld(query); } template @@ -718,6 +720,115 @@ void SmallWorldRand::SearchV1Merge(KNNQuery* query) const { } +template +void SmallWorldRand::SearchV1Merge(RangeQuery* query) const { + if (ElList_.empty()) return; + CHECK_MSG(efSearch_ > 0, "efSearch should be > 0"); +/* + * The trick of using large dense bitsets instead of unordered_set was + * borrowed from Wei Dong's kgraph: https://github.com/aaalgo/kgraph + * + * This trick works really well even in a multi-threaded mode. Indeed, the amount + * of allocated memory is small. For example, if one has 8M entries, the size of + * the bitmap is merely 1 MB. Furthermore, setting 1MB of entries to zero via memset would take only + * a fraction of millisecond. + */ + vector visitedBitset(NextNodeId_); + + /** + * Search of most k-closest elements to the query. + */ + MSWNode* currNode = pEntryPoint_; + CHECK_MSG(currNode != nullptr, "Bug: there is not entry point set!") + + SortArrBI sortedArr(efSearch_); // max(efSearch_, query->GetK()) + + const Object* currObj = currNode->getData(); + dist_t d = query->DistanceObjLeft(currObj); + sortedArr.push_unsorted_grow(d, currNode); // It won't grow + + IdType nodeId = currNode->getId(); + CHECK_MSG(nodeId < NextNodeId_, "Bug: nodeId (" + ConvertToString(nodeId) + ") > NextNodeId_ (" +ConvertToString(NextNodeId_) +")"); + + visitedBitset[nodeId] = true; + + uint_fast32_t currElem = 0; + + typedef typename SortArrBI::Item QueueItem; + + vector& queueData = sortedArr.get_data(); + vector itemBuff(8*NN_); + + // efSearch_ is always <= # of elements in the queueData.size() (the size of the BUFFER), but it can be + // larger than sortedArr.size(), which returns the number of actual elements in the buffer + while(currElem < min(sortedArr.size(),efSearch_)){ + auto& e = queueData[currElem]; + CHECK(!e.used); + e.used = true; + currNode = e.data; + ++currElem; + + for (MSWNode* neighbor : currNode->getAllFriends()) { + _mm_prefetch(reinterpret_cast(const_cast(neighbor->getData())), _MM_HINT_T0); + } + for (MSWNode* neighbor : currNode->getAllFriends()) { + _mm_prefetch(const_cast(neighbor->getData()->data()), _MM_HINT_T0); + } + + if (currNode->getAllFriends().size() > itemBuff.size()) + itemBuff.resize(currNode->getAllFriends().size()); + + size_t itemQty = 0; + + dist_t topKey = sortedArr.top_key(); + //calculate distance to each neighbor + for (MSWNode* neighbor : currNode->getAllFriends()) { + nodeId = neighbor->getId(); + CHECK_MSG(nodeId < NextNodeId_, "Bug: nodeId (" + ConvertToString(nodeId) + ") > NextNodeId_ (" +ConvertToString(NextNodeId_)); + + if (!visitedBitset[nodeId]) { + currObj = neighbor->getData(); + d = query->DistanceObjLeft(currObj); + visitedBitset[nodeId] = true; + if (sortedArr.size() < efSearch_ || d < topKey) { + itemBuff[itemQty++]=QueueItem(d, neighbor); + } + } + } + + if (itemQty) { + _mm_prefetch(const_cast(reinterpret_cast(&itemBuff[0])), _MM_HINT_T0); + std::sort(itemBuff.begin(), itemBuff.begin() + itemQty); + + size_t insIndex=0; + if (itemQty > MERGE_BUFFER_ALGO_SWITCH_THRESHOLD) { + insIndex = sortedArr.merge_with_sorted_items(&itemBuff[0], itemQty); + + if (insIndex < currElem) { + currElem = insIndex; + } + } else { + for (size_t ii = 0; ii < itemQty; ++ii) { + size_t insIndex = sortedArr.push_or_replace_non_empty_exp(itemBuff[ii].key, itemBuff[ii].data); + + if (insIndex < currElem) { + currElem = insIndex; + } + } + } + } + + // To ensure that we either reach the end of the unexplored queue or currElem points to the first unused element + while (currElem < sortedArr.size() && queueData[currElem].used == true) + ++currElem; + } + + for (uint_fast32_t i = 0; i < sortedArr.size(); ++i) { // i < query->GetK() && + query->CheckAndAddToResult(queueData[i].key, queueData[i].data->getData()); + } +} + + template void SmallWorldRand::SearchOld(KNNQuery* query) const {