Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #357 from BorisLestsov/master
Bugfixes for small_world_rand batch addition/deletion and index loading
  • Loading branch information
Leonid Boytsov authored and GitHub committed Dec 7, 2018
2 parents 1eda05d + ac07969 commit 193435a
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 18 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -17,7 +17,7 @@ NMSLIB is an **extendible library**, which means that is possible to add new sea

Other contributors: Lawrence Cayton, Wei Dong, Avrelin Nikita, Dmitry Yashunin, Bob Poekert, @orgoro, Maxim Andreev, Daniel Lemire, Nathan Kurz, Alexander Ponomarenko.

**Citing:** If you find this library useful, feel free to cite our SISAP paper [**[BibTex]**](http://dblp.uni-trier.de/rec/bibtex/conf/sisap/BoytsovN13) as well as other papers listed in the end. One crucial contribution to cite is the fast Hierarchical Navigable World graph (HNSW) method [**[BibTex]**](https://dblp.uni-trier.de/rec/bibtex/journals/corr/MalkovY16).
**Citing:** If you find this library useful, feel free to cite our SISAP paper [**[BibTex]**](http://dblp.uni-trier.de/rec/bibtex/conf/sisap/BoytsovN13) as well as other papers listed in the end. One crucial contribution to cite is the fast Hierarchical Navigable World graph (HNSW) method [**[BibTex]**](https://dblp.uni-trier.de/rec/bibtex/journals/corr/MalkovY16). Please, [also check out the stand-alone HNSW implementation by Yury Malkov](https://github.com/nmslib/hnswlib).

Leo(nid) Boytsov is a maintainer. Leo was supported by the [Open Advancement of Question Answering Systems (OAQA) group](https://github.com/oaqa) and the following NSF grant #1618159: "[Matching and Ranking via Proximity Graphs: Applications to Question Answering and Beyond](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1618159&HistoricalAwards=false)". Bileg was supported by the [iAd Center](https://web.archive.org/web/20160306011711/http://www.iad-center.com/).

Expand Down
5 changes: 5 additions & 0 deletions similarity_search/include/method/small_world_rand.h
Expand Up @@ -274,6 +274,11 @@ public:
void SetQueryTimeParams(const AnyParams& ) override;

enum PatchingStrategy { kNone = 0, kNeighborsOnly = 1 };

//This method should be called before LoadIndex to initialize parameters,
//that are usually initialized in Create Index
void InitParamsManually(const AnyParams& IndexParams);

private:

size_t NN_;
Expand Down
75 changes: 60 additions & 15 deletions similarity_search/src/method/small_world_rand.cc
Expand Up @@ -151,7 +151,21 @@ void SmallWorldRand<dist_t>::AddBatch(const ObjectVector& batchData,
<< " futureNextNodeId + 1 after batch addition: " << futureNextNodeId;

// 2) One entry should be added before all the threads are started, or else add() will not work properly
addCriticalSection(new MSWNode(batchData[0], NextNodeId_));


bool isEmpty = false;

{
unique_lock<mutex> lock(ElListGuard_);
isEmpty = ElList_.empty();
}
int start_add=0;

if (isEmpty){
addCriticalSection(new MSWNode(batchData[0], NextNodeId_));
start_add = 1;
}


unique_ptr<ProgressDisplay> progress_bar(bPrintProgress ?
new ProgressDisplay(batchData.size(), cerr)
Expand All @@ -160,7 +174,7 @@ void SmallWorldRand<dist_t>::AddBatch(const ObjectVector& batchData,
if (indexThreadQty_ <= 1) {
// Skip the first element, one element is already added
if (progress_bar) ++(*progress_bar);
for (size_t id = 1; id < batchData.size(); ++id) {
for (size_t id = start_add; id < batchData.size(); ++id) {
MSWNode* node = new MSWNode(batchData[id], id + NextNodeId_);
add(node, futureNextNodeId);
if (progress_bar) ++(*progress_bar);
Expand Down Expand Up @@ -250,21 +264,32 @@ void SmallWorldRand<dist_t>::DeleteBatch(const vector<IdType>& batchData, int de
mutex mtx;
vector<thread> threads;

for (size_t i = 0; i < indexThreadQty_; ++i) {
threads.push_back(thread(
[&]() {
MSWNode* node = nullptr;
vector<MSWNode*> cacheDelNode;
while(GetNextQueueObj(mtx, toPatchQueue, node)) {
if (kNone == patchStrat) node->removeGivenFriends(delNodesBitset);
else node->removeGivenFriendsPatchWithClosestNeighbor<dist_t>(space_, use_proxy_dist_,
delNodesBitset, cacheDelNode);
if (indexThreadQty_ <= 1) {
LOG(LIB_INFO) << "Single threaded batch delete: " << vToPatchNodes.size();
MSWNode* node = nullptr;
vector<MSWNode*> cacheDelNode;
while(GetNextQueueObj(mtx, toPatchQueue, node)) {
if (kNone == patchStrat) node->removeGivenFriends(delNodesBitset);
else node->removeGivenFriendsPatchWithClosestNeighbor<dist_t>(space_, use_proxy_dist_,
delNodesBitset, cacheDelNode);
}

} else {
for (size_t i = 0; i < indexThreadQty_; ++i) {
threads.push_back(thread(
[&]() {
MSWNode* node = nullptr;
vector<MSWNode*> cacheDelNode;
while(GetNextQueueObj(mtx, toPatchQueue, node)) {
if (kNone == patchStrat) node->removeGivenFriends(delNodesBitset);
else node->removeGivenFriendsPatchWithClosestNeighbor<dist_t>(space_, use_proxy_dist_,
delNodesBitset, cacheDelNode);
}
}
}
));
));
}
for (auto& thread : threads) thread.join();
}
for (auto& thread : threads) thread.join();


if (checkIDs) {
for (auto it : ElList_) {
Expand Down Expand Up @@ -337,6 +362,26 @@ void SmallWorldRand<dist_t>::CheckIDs() const
}
}

template <typename dist_t>
void SmallWorldRand<dist_t>::InitParamsManually(const AnyParams& IndexParams)
{
AnyParamManager pmgr(IndexParams);

pmgr.GetParamOptional("NN", NN_, 10);
pmgr.GetParamOptional("efConstruction", efConstruction_, NN_);
efSearch_ = NN_;
pmgr.GetParamOptional("indexThreadQty", indexThreadQty_, thread::hardware_concurrency());
pmgr.GetParamOptional("useProxyDist", use_proxy_dist_, false);

LOG(LIB_INFO) << "NN = " << NN_;
LOG(LIB_INFO) << "efConstruction_ = " << efConstruction_;
LOG(LIB_INFO) << "indexThreadQty = " << indexThreadQty_;
LOG(LIB_INFO) << "useProxyDist = " << use_proxy_dist_;

pmgr.CheckUnused();
}


template <typename dist_t>
void SmallWorldRand<dist_t>::CreateIndex(const AnyParams& IndexParams)
{
Expand Down
6 changes: 4 additions & 2 deletions test_batch_app/test_batch_mod.cc
Expand Up @@ -194,10 +194,12 @@ void doWork(int argc, char* argv[]) {

CHECK_MSG(knnK > 0, "k-NN k should be > 0!");

int seed = 0;

if (LogFile != "")
initLibrary(LIB_LOGFILE, LogFile.c_str());
initLibrary(seed, LIB_LOGFILE, LogFile.c_str());
else
initLibrary(LIB_LOGSTDERR, NULL); // Use STDERR for logging
initLibrary(seed, LIB_LOGSTDERR, NULL); // Use STDERR for logging

unique_ptr<Space<float>> space(SpaceFactoryRegistry<float>::Instance().CreateSpace(SpaceType, *SpaceParams));

Expand Down

0 comments on commit 193435a

Please sign in to comment.