From ac07969354e08439fc1a1829c30602ef798c8c7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D1=81=D1=86=D0=BE=D0=B2=20=D0=91=D0=BE=D1=80?= =?UTF-8?q?=D0=B8=D1=81?= Date: Fri, 16 Nov 2018 20:50:26 +0300 Subject: [PATCH] Bugfixes for index loading and AddBatch, optional single threaded DeleteBatch --- .../include/method/small_world_rand.h | 5 ++ .../src/method/small_world_rand.cc | 75 +++++++++++++++---- test_batch_app/test_batch_mod.cc | 6 +- 3 files changed, 69 insertions(+), 17 deletions(-) diff --git a/similarity_search/include/method/small_world_rand.h b/similarity_search/include/method/small_world_rand.h index 5275e48..987f6db 100644 --- a/similarity_search/include/method/small_world_rand.h +++ b/similarity_search/include/method/small_world_rand.h @@ -274,6 +274,11 @@ public: void SetQueryTimeParams(const AnyParams& ) override; enum PatchingStrategy { kNone = 0, kNeighborsOnly = 1 }; + + //This method should be called before LoadIndex to initialize parameters, + //that are usually initialized in Create Index + void InitParamsManually(const AnyParams& IndexParams); + private: size_t NN_; diff --git a/similarity_search/src/method/small_world_rand.cc b/similarity_search/src/method/small_world_rand.cc index e8280cb..e548d73 100644 --- a/similarity_search/src/method/small_world_rand.cc +++ b/similarity_search/src/method/small_world_rand.cc @@ -151,7 +151,21 @@ void SmallWorldRand::AddBatch(const ObjectVector& batchData, << " futureNextNodeId + 1 after batch addition: " << futureNextNodeId; // 2) One entry should be added before all the threads are started, or else add() will not work properly - addCriticalSection(new MSWNode(batchData[0], NextNodeId_)); + + + bool isEmpty = false; + + { + unique_lock lock(ElListGuard_); + isEmpty = ElList_.empty(); + } + int start_add=0; + + if (isEmpty){ + addCriticalSection(new MSWNode(batchData[0], NextNodeId_)); + start_add = 1; + } + unique_ptr progress_bar(bPrintProgress ? new ProgressDisplay(batchData.size(), cerr) @@ -160,7 +174,7 @@ void SmallWorldRand::AddBatch(const ObjectVector& batchData, if (indexThreadQty_ <= 1) { // Skip the first element, one element is already added if (progress_bar) ++(*progress_bar); - for (size_t id = 1; id < batchData.size(); ++id) { + for (size_t id = start_add; id < batchData.size(); ++id) { MSWNode* node = new MSWNode(batchData[id], id + NextNodeId_); add(node, futureNextNodeId); if (progress_bar) ++(*progress_bar); @@ -250,21 +264,32 @@ void SmallWorldRand::DeleteBatch(const vector& batchData, int de mutex mtx; vector threads; - for (size_t i = 0; i < indexThreadQty_; ++i) { - threads.push_back(thread( - [&]() { - MSWNode* node = nullptr; - vector cacheDelNode; - while(GetNextQueueObj(mtx, toPatchQueue, node)) { - if (kNone == patchStrat) node->removeGivenFriends(delNodesBitset); - else node->removeGivenFriendsPatchWithClosestNeighbor(space_, use_proxy_dist_, - delNodesBitset, cacheDelNode); + if (indexThreadQty_ <= 1) { + LOG(LIB_INFO) << "Single threaded batch delete: " << vToPatchNodes.size(); + MSWNode* node = nullptr; + vector cacheDelNode; + while(GetNextQueueObj(mtx, toPatchQueue, node)) { + if (kNone == patchStrat) node->removeGivenFriends(delNodesBitset); + else node->removeGivenFriendsPatchWithClosestNeighbor(space_, use_proxy_dist_, + delNodesBitset, cacheDelNode); + } + + } else { + for (size_t i = 0; i < indexThreadQty_; ++i) { + threads.push_back(thread( + [&]() { + MSWNode* node = nullptr; + vector cacheDelNode; + while(GetNextQueueObj(mtx, toPatchQueue, node)) { + if (kNone == patchStrat) node->removeGivenFriends(delNodesBitset); + else node->removeGivenFriendsPatchWithClosestNeighbor(space_, use_proxy_dist_, + delNodesBitset, cacheDelNode); + } } - } - )); + )); + } + for (auto& thread : threads) thread.join(); } - for (auto& thread : threads) thread.join(); - if (checkIDs) { for (auto it : ElList_) { @@ -337,6 +362,26 @@ void SmallWorldRand::CheckIDs() const } } +template +void SmallWorldRand::InitParamsManually(const AnyParams& IndexParams) +{ + AnyParamManager pmgr(IndexParams); + + pmgr.GetParamOptional("NN", NN_, 10); + pmgr.GetParamOptional("efConstruction", efConstruction_, NN_); + efSearch_ = NN_; + pmgr.GetParamOptional("indexThreadQty", indexThreadQty_, thread::hardware_concurrency()); + pmgr.GetParamOptional("useProxyDist", use_proxy_dist_, false); + + LOG(LIB_INFO) << "NN = " << NN_; + LOG(LIB_INFO) << "efConstruction_ = " << efConstruction_; + LOG(LIB_INFO) << "indexThreadQty = " << indexThreadQty_; + LOG(LIB_INFO) << "useProxyDist = " << use_proxy_dist_; + + pmgr.CheckUnused(); +} + + template void SmallWorldRand::CreateIndex(const AnyParams& IndexParams) { diff --git a/test_batch_app/test_batch_mod.cc b/test_batch_app/test_batch_mod.cc index 3a8ed0e..72ed095 100644 --- a/test_batch_app/test_batch_mod.cc +++ b/test_batch_app/test_batch_mod.cc @@ -194,10 +194,12 @@ void doWork(int argc, char* argv[]) { CHECK_MSG(knnK > 0, "k-NN k should be > 0!"); + int seed = 0; + if (LogFile != "") - initLibrary(LIB_LOGFILE, LogFile.c_str()); + initLibrary(seed, LIB_LOGFILE, LogFile.c_str()); else - initLibrary(LIB_LOGSTDERR, NULL); // Use STDERR for logging + initLibrary(seed, LIB_LOGSTDERR, NULL); // Use STDERR for logging unique_ptr> space(SpaceFactoryRegistry::Instance().CreateSpace(SpaceType, *SpaceParams));