From 790e5aae4afa88e0c8d1366d48e7c8083d3fa60e Mon Sep 17 00:00:00 2001 From: searchivairus Date: Sun, 18 Feb 2018 00:21:51 -0500 Subject: [PATCH] Multiple relatively minor changes. --- python_bindings/setup.py | 2 +- sample_standalone_app/makefile | 14 +++- .../sample_standalone_app1.cc | 8 +- .../sample_standalone_app2.cc | 3 +- similarity_search/include/experiments.h | 2 +- similarity_search/include/thread_pool.h | 76 ++++++++++--------- similarity_search/src/method/hnsw.cc | 6 +- similarity_search/test/test_thread_pool.cc | 4 +- 8 files changed, 66 insertions(+), 49 deletions(-) diff --git a/python_bindings/setup.py b/python_bindings/setup.py index 73bf495..4df18c4 100755 --- a/python_bindings/setup.py +++ b/python_bindings/setup.py @@ -127,7 +127,7 @@ def build_extensions(self): name='nmslib', version=__version__, description='Non-Metric Space Library (NMSLIB)', - author='Leonid Boytsov', + author='Bilegsaikhan Naidan, Leonid Boytsov, Yury Malkov, Ben Frederickson, David Novak, and others', url='https://github.com/searchivarius/nmslib', long_description="""Non-Metric Space Library (NMSLIB) is an efficient cross-platform similarity search library and a toolkit for evaluation of similarity search methods. The diff --git a/sample_standalone_app/makefile b/sample_standalone_app/makefile index 548ab12..ca8cf72 100644 --- a/sample_standalone_app/makefile +++ b/sample_standalone_app/makefile @@ -1,10 +1,16 @@ -# Set these variables if you installed the library to a non-standard location -#NON_METRIC_SPACE_LIBRARY=/home/leo/NonMetrLibDebug -NON_METRIC_SPACE_LIBRARY=$(HOME)/NonMetrLibRelease +# Set these variables if you installed the library to your home directory +#NON_METRIC_SPACE_LIBRARY=$(HOME)/NonMetrLibRelease NON_METRIC_SPACE_LIBRARY_INC=$(NON_METRIC_SPACE_LIBRARY)/include NON_METRIC_SPACE_LIBRARY_LIB=$(NON_METRIC_SPACE_LIBRARY)/lib -LIB_GSL=-lgsl -lgslcblas -llshkit +# These lines are used to build against the library built in the current directory +NON_METRIC_SPACE_LIBRARY=../similarity_search/ +NON_METRIC_SPACE_LIBRARY_INC=$(NON_METRIC_SPACE_LIBRARY)/include +NON_METRIC_SPACE_LIBRARY_LIB=$(NON_METRIC_SPACE_LIBRARY)/release + + +# You can uncomment this, if the library was built with extras +#LIB_GSL=-lgsl -lgslcblas -llshkit CXXFLAGS += -I$(NON_METRIC_SPACE_LIBRARY_INC) # Enable C++11 diff --git a/sample_standalone_app/sample_standalone_app1.cc b/sample_standalone_app/sample_standalone_app1.cc index 9e0d507..964729a 100644 --- a/sample_standalone_app/sample_standalone_app1.cc +++ b/sample_standalone_app/sample_standalone_app1.cc @@ -160,17 +160,19 @@ int main(int argc, char* argv[]) { cout << "We have the space and the query, let's create some search index." << endl; + int seed = 0; + /* * Init library, specify a log file */ if (LOG_OPTION == 1) - initLibrary(LIB_LOGFILE, "logfile.txt"); + initLibrary(seed, LIB_LOGFILE, "logfile.txt"); // No logging if (LOG_OPTION == 2) - initLibrary(LIB_LOGNONE, NULL); + initLibrary(seed, LIB_LOGNONE, NULL); // Use STDERR if (LOG_OPTION == 3) - initLibrary(LIB_LOGSTDERR, NULL); + initLibrary(seed, LIB_LOGSTDERR, NULL); AnyParams IndexParams( { diff --git a/sample_standalone_app/sample_standalone_app2.cc b/sample_standalone_app/sample_standalone_app2.cc index 4f1f421..7435240 100644 --- a/sample_standalone_app/sample_standalone_app2.cc +++ b/sample_standalone_app/sample_standalone_app2.cc @@ -92,8 +92,9 @@ int main(int argc, char* argv[]) { const char *dataFile = argv[2]; const char *queryFile = argv[3]; + int seed = 0; - initLibrary(LIB_LOGSTDERR, NULL); + initLibrary(seed, LIB_LOGSTDERR, NULL); // Create an instance of our custom space that uses L2-distance AnyParams empty; diff --git a/similarity_search/include/experiments.h b/similarity_search/include/experiments.h index 860ae74..b961ec8 100644 --- a/similarity_search/include/experiments.h +++ b/similarity_search/include/experiments.h @@ -172,7 +172,7 @@ class Experiments { * Because each thread uses its own parameter set, we must use * exactly ThreadTestQty sets. */ - ParallelFor(0, ThreadTestQty, ThreadTestQty, [&](unsigned QueryPart) { + ParallelFor(0, ThreadTestQty, ThreadTestQty, [&](unsigned QueryPart, unsigned ThreadId) { size_t numquery = config.GetQueryObjects().size(); WallClockTimer wtm; diff --git a/similarity_search/include/thread_pool.h b/similarity_search/include/thread_pool.h index 299a99d..29d5b67 100644 --- a/similarity_search/include/thread_pool.h +++ b/similarity_search/include/thread_pool.h @@ -64,45 +64,53 @@ namespace similarity { numThreads = std::thread::hardware_concurrency(); } - std::vector threads; - std::atomic current(start); + if (numThreads == 1) { + for (size_t id = start; id < end; id++) { + fn(id, 0); + } + } else { + std::vector threads; + std::atomic current(start); - // keep track of exceptions in threads - // https://stackoverflow.com/a/32428427/1713196 - std::exception_ptr lastException = nullptr; - std::mutex lastExceptMutex; + // keep track of exceptions in threads + // https://stackoverflow.com/a/32428427/1713196 + std::exception_ptr lastException = nullptr; + std::mutex lastExceptMutex; - for (size_t i = 0; i < numThreads; ++i) { - threads.push_back(std::thread([&] { - while (true) { - size_t id = current.fetch_add(1); + for (size_t threadId = 0; threadId < numThreads; ++threadId) { + threads.push_back(std::thread([&, threadId] { + while (true) { + size_t id = current.fetch_add(1); - if ((id >= end)) { - break; - } + if ((id >= end)) { + break; + } - try { - fn(id); - } catch (...) { - std::unique_lock lastExcepLock(lastExceptMutex); - lastException = std::current_exception(); - /* - * This will work even when current is the largest value that - * size_t can fit, because fetch_add returns the previous value - * before the increment (what will result in overflow - * and produce 0 instead of current + 1). - */ - current = end; - break; + try { + fn(id, threadId); + } catch (...) { + std::unique_lock lastExcepLock(lastExceptMutex); + lastException = std::current_exception(); + /* + * This will work even when current is the largest value that + * size_t can fit, because fetch_add returns the previous value + * before the increment (what will result in overflow + * and produce 0 instead of current + 1). + */ + current = end; + break; + } } - } - })); - } - for (auto & thread : threads) { - thread.join(); - } - if (lastException) { - std::rethrow_exception(lastException); + })); + } + for (auto & thread : threads) { + thread.join(); + } + if (lastException) { + std::rethrow_exception(lastException); + } } + + } }; diff --git a/similarity_search/src/method/hnsw.cc b/similarity_search/src/method/hnsw.cc index 508ea62..30a7c8a 100644 --- a/similarity_search/src/method/hnsw.cc +++ b/similarity_search/src/method/hnsw.cc @@ -205,7 +205,7 @@ namespace similarity { unique_ptr progress_bar(PrintProgress_ ? new ProgressDisplay(this->data_.size(), cerr) : NULL); - ParallelFor(1, this->data_.size(), indexThreadQty_, [&](int id) { + ParallelFor(1, this->data_.size(), indexThreadQty_, [&](int id, int threadId) { HnswNode *node = new HnswNode(this->data_[id], id); add(&space_, node); { @@ -230,7 +230,7 @@ namespace similarity { /// Making the same index in reverse order unique_ptr progress_bar1(PrintProgress_ ? new ProgressDisplay(this->data_.size(), cerr) : NULL); - ParallelFor(1, this->data_.size(), indexThreadQty_, [&](int pos_id) { + ParallelFor(1, this->data_.size(), indexThreadQty_, [&](int pos_id, int threadId) { // reverse ordering (so we iterate decreasing). given // parallelfor, this might not make a difference int id = this->data_.size() - pos_id; @@ -248,7 +248,7 @@ namespace similarity { int maxF = 0; // int degrees[100] = {0}; - ParallelFor(1, this->data_.size(), indexThreadQty_, [&](int id) { + ParallelFor(1, this->data_.size(), indexThreadQty_, [&](int id, int threadId) { HnswNode *node1 = ElList_[id]; HnswNode *node2 = temp[id]; vector f1 = node1->getAllFriends(0); diff --git a/similarity_search/test/test_thread_pool.cc b/similarity_search/test/test_thread_pool.cc index 9d3f3a2..8cfddf4 100644 --- a/similarity_search/test/test_thread_pool.cc +++ b/similarity_search/test/test_thread_pool.cc @@ -19,7 +19,7 @@ namespace similarity { TEST(TestParallelFor) { std::vector squares(1000); - ParallelFor(0, squares.size(), 0, [&](int id) { + ParallelFor(0, squares.size(), 0, [&](int id, int threadId) { squares[id] = id * id; }); for (size_t i = 0; i < squares.size(); ++i) { @@ -33,7 +33,7 @@ TEST(TestParallelForException) { bool has_thrown = false; std::string message = "not gonna do it"; try { - ParallelFor(0, squares.size(), 0, [&](int id) { + ParallelFor(0, squares.size(), 0, [&](int id, int threadId) { if (id == 50) throw std::invalid_argument(message); squares[id] = id * id; });