From c2c46d538ec7493ff9c90264d015ee0e70fb1a4c Mon Sep 17 00:00:00 2001 From: Leonid Boytsov Date: Wed, 7 Nov 2018 22:19:07 -0500 Subject: [PATCH 1/2] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 04ec3b6..05c39db 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ NMSLIB is an **extendible library**, which means that is possible to add new sea Other contributors: Lawrence Cayton, Wei Dong, Avrelin Nikita, Dmitry Yashunin, Bob Poekert, @orgoro, Maxim Andreev, Daniel Lemire, Nathan Kurz, Alexander Ponomarenko. -**Citing:** If you find this library useful, feel free to cite our SISAP paper [**[BibTex]**](http://dblp.uni-trier.de/rec/bibtex/conf/sisap/BoytsovN13) as well as other papers listed in the end. One crucial contribution to cite is the fast Hierarchical Navigable World graph (HNSW) method [**[BibTex]**](https://dblp.uni-trier.de/rec/bibtex/journals/corr/MalkovY16). +**Citing:** If you find this library useful, feel free to cite our SISAP paper [**[BibTex]**](http://dblp.uni-trier.de/rec/bibtex/conf/sisap/BoytsovN13) as well as other papers listed in the end. One crucial contribution to cite is the fast Hierarchical Navigable World graph (HNSW) method [**[BibTex]**](https://dblp.uni-trier.de/rec/bibtex/journals/corr/MalkovY16). Please, [also check out the stand-alone HNSW implementation by Yury Malkov](https://github.com/nmslib/hnswlib). Leo(nid) Boytsov is a maintainer. Leo was supported by the [Open Advancement of Question Answering Systems (OAQA) group](https://github.com/oaqa) and the following NSF grant #1618159: "[Matching and Ranking via Proximity Graphs: Applications to Question Answering and Beyond](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1618159&HistoricalAwards=false)". Bileg was supported by the [iAd Center](https://web.archive.org/web/20160306011711/http://www.iad-center.com/). From ac07969354e08439fc1a1829c30602ef798c8c7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D1=81=D1=86=D0=BE=D0=B2=20=D0=91=D0=BE=D1=80?= =?UTF-8?q?=D0=B8=D1=81?= Date: Fri, 16 Nov 2018 20:50:26 +0300 Subject: [PATCH 2/2] Bugfixes for index loading and AddBatch, optional single threaded DeleteBatch --- .../include/method/small_world_rand.h | 5 ++ .../src/method/small_world_rand.cc | 75 +++++++++++++++---- test_batch_app/test_batch_mod.cc | 6 +- 3 files changed, 69 insertions(+), 17 deletions(-) diff --git a/similarity_search/include/method/small_world_rand.h b/similarity_search/include/method/small_world_rand.h index 5275e48..987f6db 100644 --- a/similarity_search/include/method/small_world_rand.h +++ b/similarity_search/include/method/small_world_rand.h @@ -274,6 +274,11 @@ class SmallWorldRand : public Index { void SetQueryTimeParams(const AnyParams& ) override; enum PatchingStrategy { kNone = 0, kNeighborsOnly = 1 }; + + //This method should be called before LoadIndex to initialize parameters, + //that are usually initialized in Create Index + void InitParamsManually(const AnyParams& IndexParams); + private: size_t NN_; diff --git a/similarity_search/src/method/small_world_rand.cc b/similarity_search/src/method/small_world_rand.cc index e8280cb..e548d73 100644 --- a/similarity_search/src/method/small_world_rand.cc +++ b/similarity_search/src/method/small_world_rand.cc @@ -151,7 +151,21 @@ void SmallWorldRand::AddBatch(const ObjectVector& batchData, << " futureNextNodeId + 1 after batch addition: " << futureNextNodeId; // 2) One entry should be added before all the threads are started, or else add() will not work properly - addCriticalSection(new MSWNode(batchData[0], NextNodeId_)); + + + bool isEmpty = false; + + { + unique_lock lock(ElListGuard_); + isEmpty = ElList_.empty(); + } + int start_add=0; + + if (isEmpty){ + addCriticalSection(new MSWNode(batchData[0], NextNodeId_)); + start_add = 1; + } + unique_ptr progress_bar(bPrintProgress ? new ProgressDisplay(batchData.size(), cerr) @@ -160,7 +174,7 @@ void SmallWorldRand::AddBatch(const ObjectVector& batchData, if (indexThreadQty_ <= 1) { // Skip the first element, one element is already added if (progress_bar) ++(*progress_bar); - for (size_t id = 1; id < batchData.size(); ++id) { + for (size_t id = start_add; id < batchData.size(); ++id) { MSWNode* node = new MSWNode(batchData[id], id + NextNodeId_); add(node, futureNextNodeId); if (progress_bar) ++(*progress_bar); @@ -250,21 +264,32 @@ void SmallWorldRand::DeleteBatch(const vector& batchData, int de mutex mtx; vector threads; - for (size_t i = 0; i < indexThreadQty_; ++i) { - threads.push_back(thread( - [&]() { - MSWNode* node = nullptr; - vector cacheDelNode; - while(GetNextQueueObj(mtx, toPatchQueue, node)) { - if (kNone == patchStrat) node->removeGivenFriends(delNodesBitset); - else node->removeGivenFriendsPatchWithClosestNeighbor(space_, use_proxy_dist_, - delNodesBitset, cacheDelNode); + if (indexThreadQty_ <= 1) { + LOG(LIB_INFO) << "Single threaded batch delete: " << vToPatchNodes.size(); + MSWNode* node = nullptr; + vector cacheDelNode; + while(GetNextQueueObj(mtx, toPatchQueue, node)) { + if (kNone == patchStrat) node->removeGivenFriends(delNodesBitset); + else node->removeGivenFriendsPatchWithClosestNeighbor(space_, use_proxy_dist_, + delNodesBitset, cacheDelNode); + } + + } else { + for (size_t i = 0; i < indexThreadQty_; ++i) { + threads.push_back(thread( + [&]() { + MSWNode* node = nullptr; + vector cacheDelNode; + while(GetNextQueueObj(mtx, toPatchQueue, node)) { + if (kNone == patchStrat) node->removeGivenFriends(delNodesBitset); + else node->removeGivenFriendsPatchWithClosestNeighbor(space_, use_proxy_dist_, + delNodesBitset, cacheDelNode); + } } - } - )); + )); + } + for (auto& thread : threads) thread.join(); } - for (auto& thread : threads) thread.join(); - if (checkIDs) { for (auto it : ElList_) { @@ -337,6 +362,26 @@ void SmallWorldRand::CheckIDs() const } } +template +void SmallWorldRand::InitParamsManually(const AnyParams& IndexParams) +{ + AnyParamManager pmgr(IndexParams); + + pmgr.GetParamOptional("NN", NN_, 10); + pmgr.GetParamOptional("efConstruction", efConstruction_, NN_); + efSearch_ = NN_; + pmgr.GetParamOptional("indexThreadQty", indexThreadQty_, thread::hardware_concurrency()); + pmgr.GetParamOptional("useProxyDist", use_proxy_dist_, false); + + LOG(LIB_INFO) << "NN = " << NN_; + LOG(LIB_INFO) << "efConstruction_ = " << efConstruction_; + LOG(LIB_INFO) << "indexThreadQty = " << indexThreadQty_; + LOG(LIB_INFO) << "useProxyDist = " << use_proxy_dist_; + + pmgr.CheckUnused(); +} + + template void SmallWorldRand::CreateIndex(const AnyParams& IndexParams) { diff --git a/test_batch_app/test_batch_mod.cc b/test_batch_app/test_batch_mod.cc index 3a8ed0e..72ed095 100644 --- a/test_batch_app/test_batch_mod.cc +++ b/test_batch_app/test_batch_mod.cc @@ -194,10 +194,12 @@ void doWork(int argc, char* argv[]) { CHECK_MSG(knnK > 0, "k-NN k should be > 0!"); + int seed = 0; + if (LogFile != "") - initLibrary(LIB_LOGFILE, LogFile.c_str()); + initLibrary(seed, LIB_LOGFILE, LogFile.c_str()); else - initLibrary(LIB_LOGSTDERR, NULL); // Use STDERR for logging + initLibrary(seed, LIB_LOGSTDERR, NULL); // Use STDERR for logging unique_ptr> space(SpaceFactoryRegistry::Instance().CreateSpace(SpaceType, *SpaceParams));