Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
sw add rangequery
  • Loading branch information
ChunjiangZhu committed Oct 25, 2019
1 parent c2d007a commit e4ecd4d
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 1 deletion.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified similarity_search/.DS_Store
Binary file not shown.
113 changes: 112 additions & 1 deletion similarity_search/src/method/small_world_rand.cc
Expand Up @@ -600,7 +600,9 @@ void SmallWorldRand<dist_t>::addCriticalSection(MSWNode *newElement){

template <typename dist_t>
void SmallWorldRand<dist_t>::Search(RangeQuery<dist_t>* query, IdType) const {
throw runtime_error("Range search is not supported!");
// throw runtime_error("Range search is not supported!");
if (searchAlgoType_ == kV1Merge) SearchV1Merge(query);
// else SearchOld(query);
}

template <typename dist_t>
Expand Down Expand Up @@ -718,6 +720,115 @@ void SmallWorldRand<dist_t>::SearchV1Merge(KNNQuery<dist_t>* query) const {
}


template <typename dist_t>
void SmallWorldRand<dist_t>::SearchV1Merge(RangeQuery<dist_t>* query) const {
if (ElList_.empty()) return;
CHECK_MSG(efSearch_ > 0, "efSearch should be > 0");
/*
* The trick of using large dense bitsets instead of unordered_set was
* borrowed from Wei Dong's kgraph: https://github.com/aaalgo/kgraph
*
* This trick works really well even in a multi-threaded mode. Indeed, the amount
* of allocated memory is small. For example, if one has 8M entries, the size of
* the bitmap is merely 1 MB. Furthermore, setting 1MB of entries to zero via memset would take only
* a fraction of millisecond.
*/
vector<bool> visitedBitset(NextNodeId_);

/**
* Search of most k-closest elements to the query.
*/
MSWNode* currNode = pEntryPoint_;
CHECK_MSG(currNode != nullptr, "Bug: there is not entry point set!")

SortArrBI<dist_t,MSWNode*> sortedArr(efSearch_); // max<size_t>(efSearch_, query->GetK())

const Object* currObj = currNode->getData();
dist_t d = query->DistanceObjLeft(currObj);
sortedArr.push_unsorted_grow(d, currNode); // It won't grow

IdType nodeId = currNode->getId();
CHECK_MSG(nodeId < NextNodeId_, "Bug: nodeId (" + ConvertToString(nodeId) + ") > NextNodeId_ (" +ConvertToString(NextNodeId_) +")");

visitedBitset[nodeId] = true;

uint_fast32_t currElem = 0;

typedef typename SortArrBI<dist_t,MSWNode*>::Item QueueItem;

vector<QueueItem>& queueData = sortedArr.get_data();
vector<QueueItem> itemBuff(8*NN_);

// efSearch_ is always <= # of elements in the queueData.size() (the size of the BUFFER), but it can be
// larger than sortedArr.size(), which returns the number of actual elements in the buffer
while(currElem < min(sortedArr.size(),efSearch_)){
auto& e = queueData[currElem];
CHECK(!e.used);
e.used = true;
currNode = e.data;
++currElem;

for (MSWNode* neighbor : currNode->getAllFriends()) {
_mm_prefetch(reinterpret_cast<const char*>(const_cast<const Object*>(neighbor->getData())), _MM_HINT_T0);
}
for (MSWNode* neighbor : currNode->getAllFriends()) {
_mm_prefetch(const_cast<const char*>(neighbor->getData()->data()), _MM_HINT_T0);
}

if (currNode->getAllFriends().size() > itemBuff.size())
itemBuff.resize(currNode->getAllFriends().size());

size_t itemQty = 0;

dist_t topKey = sortedArr.top_key();
//calculate distance to each neighbor
for (MSWNode* neighbor : currNode->getAllFriends()) {
nodeId = neighbor->getId();
CHECK_MSG(nodeId < NextNodeId_, "Bug: nodeId (" + ConvertToString(nodeId) + ") > NextNodeId_ (" +ConvertToString(NextNodeId_));

if (!visitedBitset[nodeId]) {
currObj = neighbor->getData();
d = query->DistanceObjLeft(currObj);
visitedBitset[nodeId] = true;
if (sortedArr.size() < efSearch_ || d < topKey) {
itemBuff[itemQty++]=QueueItem(d, neighbor);
}
}
}

if (itemQty) {
_mm_prefetch(const_cast<const char*>(reinterpret_cast<char*>(&itemBuff[0])), _MM_HINT_T0);
std::sort(itemBuff.begin(), itemBuff.begin() + itemQty);

size_t insIndex=0;
if (itemQty > MERGE_BUFFER_ALGO_SWITCH_THRESHOLD) {
insIndex = sortedArr.merge_with_sorted_items(&itemBuff[0], itemQty);

if (insIndex < currElem) {
currElem = insIndex;
}
} else {
for (size_t ii = 0; ii < itemQty; ++ii) {
size_t insIndex = sortedArr.push_or_replace_non_empty_exp(itemBuff[ii].key, itemBuff[ii].data);

if (insIndex < currElem) {
currElem = insIndex;
}
}
}
}

// To ensure that we either reach the end of the unexplored queue or currElem points to the first unused element
while (currElem < sortedArr.size() && queueData[currElem].used == true)
++currElem;
}

for (uint_fast32_t i = 0; i < sortedArr.size(); ++i) { // i < query->GetK() &&
query->CheckAndAddToResult(queueData[i].key, queueData[i].data->getData());
}
}


template <typename dist_t>
void SmallWorldRand<dist_t>::SearchOld(KNNQuery<dist_t>* query) const {

Expand Down

0 comments on commit e4ecd4d

Please sign in to comment.