diff --git a/.DS_Store b/.DS_Store index 92d8ceb..ea84550 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/similarity_search/.DS_Store b/similarity_search/.DS_Store index 5362351..1b0ef4c 100644 Binary files a/similarity_search/.DS_Store and b/similarity_search/.DS_Store differ diff --git a/similarity_search/src/method/small_world_rand.cc b/similarity_search/src/method/small_world_rand.cc index e548d73..63d14a8 100644 --- a/similarity_search/src/method/small_world_rand.cc +++ b/similarity_search/src/method/small_world_rand.cc @@ -600,7 +600,9 @@ void SmallWorldRand::addCriticalSection(MSWNode *newElement){ template void SmallWorldRand::Search(RangeQuery* query, IdType) const { - throw runtime_error("Range search is not supported!"); +// throw runtime_error("Range search is not supported!"); + if (searchAlgoType_ == kV1Merge) SearchV1Merge(query); +// else SearchOld(query); } template @@ -718,6 +720,115 @@ void SmallWorldRand::SearchV1Merge(KNNQuery* query) const { } +template +void SmallWorldRand::SearchV1Merge(RangeQuery* query) const { + if (ElList_.empty()) return; + CHECK_MSG(efSearch_ > 0, "efSearch should be > 0"); +/* + * The trick of using large dense bitsets instead of unordered_set was + * borrowed from Wei Dong's kgraph: https://github.com/aaalgo/kgraph + * + * This trick works really well even in a multi-threaded mode. Indeed, the amount + * of allocated memory is small. For example, if one has 8M entries, the size of + * the bitmap is merely 1 MB. Furthermore, setting 1MB of entries to zero via memset would take only + * a fraction of millisecond. + */ + vector visitedBitset(NextNodeId_); + + /** + * Search of most k-closest elements to the query. + */ + MSWNode* currNode = pEntryPoint_; + CHECK_MSG(currNode != nullptr, "Bug: there is not entry point set!") + + SortArrBI sortedArr(efSearch_); // max(efSearch_, query->GetK()) + + const Object* currObj = currNode->getData(); + dist_t d = query->DistanceObjLeft(currObj); + sortedArr.push_unsorted_grow(d, currNode); // It won't grow + + IdType nodeId = currNode->getId(); + CHECK_MSG(nodeId < NextNodeId_, "Bug: nodeId (" + ConvertToString(nodeId) + ") > NextNodeId_ (" +ConvertToString(NextNodeId_) +")"); + + visitedBitset[nodeId] = true; + + uint_fast32_t currElem = 0; + + typedef typename SortArrBI::Item QueueItem; + + vector& queueData = sortedArr.get_data(); + vector itemBuff(8*NN_); + + // efSearch_ is always <= # of elements in the queueData.size() (the size of the BUFFER), but it can be + // larger than sortedArr.size(), which returns the number of actual elements in the buffer + while(currElem < min(sortedArr.size(),efSearch_)){ + auto& e = queueData[currElem]; + CHECK(!e.used); + e.used = true; + currNode = e.data; + ++currElem; + + for (MSWNode* neighbor : currNode->getAllFriends()) { + _mm_prefetch(reinterpret_cast(const_cast(neighbor->getData())), _MM_HINT_T0); + } + for (MSWNode* neighbor : currNode->getAllFriends()) { + _mm_prefetch(const_cast(neighbor->getData()->data()), _MM_HINT_T0); + } + + if (currNode->getAllFriends().size() > itemBuff.size()) + itemBuff.resize(currNode->getAllFriends().size()); + + size_t itemQty = 0; + + dist_t topKey = sortedArr.top_key(); + //calculate distance to each neighbor + for (MSWNode* neighbor : currNode->getAllFriends()) { + nodeId = neighbor->getId(); + CHECK_MSG(nodeId < NextNodeId_, "Bug: nodeId (" + ConvertToString(nodeId) + ") > NextNodeId_ (" +ConvertToString(NextNodeId_)); + + if (!visitedBitset[nodeId]) { + currObj = neighbor->getData(); + d = query->DistanceObjLeft(currObj); + visitedBitset[nodeId] = true; + if (sortedArr.size() < efSearch_ || d < topKey) { + itemBuff[itemQty++]=QueueItem(d, neighbor); + } + } + } + + if (itemQty) { + _mm_prefetch(const_cast(reinterpret_cast(&itemBuff[0])), _MM_HINT_T0); + std::sort(itemBuff.begin(), itemBuff.begin() + itemQty); + + size_t insIndex=0; + if (itemQty > MERGE_BUFFER_ALGO_SWITCH_THRESHOLD) { + insIndex = sortedArr.merge_with_sorted_items(&itemBuff[0], itemQty); + + if (insIndex < currElem) { + currElem = insIndex; + } + } else { + for (size_t ii = 0; ii < itemQty; ++ii) { + size_t insIndex = sortedArr.push_or_replace_non_empty_exp(itemBuff[ii].key, itemBuff[ii].data); + + if (insIndex < currElem) { + currElem = insIndex; + } + } + } + } + + // To ensure that we either reach the end of the unexplored queue or currElem points to the first unused element + while (currElem < sortedArr.size() && queueData[currElem].used == true) + ++currElem; + } + + for (uint_fast32_t i = 0; i < sortedArr.size(); ++i) { // i < query->GetK() && + query->CheckAndAddToResult(queueData[i].key, queueData[i].data->getData()); + } +} + + template void SmallWorldRand::SearchOld(KNNQuery* query) const {