From c6ac418c21986e137d0560d248f4997c15c7a1c8 Mon Sep 17 00:00:00 2001 From: searchivairus Date: Sun, 28 Jan 2018 21:13:47 -0500 Subject: [PATCH] updates for #249 and sort of a fix for #274 --- similarity_search/include/experiments.h | 177 +++++------------- .../include/ported_boost_progress.h | 5 + similarity_search/include/sort_arr_bi.h | 21 ++- similarity_search/src/method/hnsw.cc | 14 +- 4 files changed, 73 insertions(+), 144 deletions(-) diff --git a/similarity_search/include/experiments.h b/similarity_search/include/experiments.h index 4134002..b9d077a 100644 --- a/similarity_search/include/experiments.h +++ b/similarity_search/include/experiments.h @@ -43,6 +43,7 @@ #include "eval_results.h" #include "meta_analysis.h" #include "query_creator.h" +#include "thread_pool.h" namespace similarity { @@ -104,102 +105,6 @@ class Experiments { if (LogInfo) LOG(LIB_INFO) << "experiment done at " << LibGetCurrentTime(); } - template - struct BenchmarkThreadParams { - BenchmarkThreadParams( - mutex& UpdateStat, - unsigned ThreadQty, - unsigned QueryPart, - size_t TestSetId, - std::vector& ExpRes, - const ExperimentConfig& config, - const QueryCreatorType& QueryCreator, - const Index& Method, - unsigned MethNum, - vector& SearchTime, - vector& AvgNumDistComp, - vector& max_result_size, - vector& avg_result_size, - vector& DistCompQty) : - UpdateStat_(UpdateStat), - ThreadQty_(ThreadQty), - QueryPart_(QueryPart), - TestSetId_(TestSetId), - ExpRes_(ExpRes), - config_(config), - QueryCreator_(QueryCreator), - Method_(Method), - MethNum_(MethNum), - SearchTime_(SearchTime), - - AvgNumDistComp_(AvgNumDistComp), - max_result_size_(max_result_size), - avg_result_size_(avg_result_size), - DistCompQty_(DistCompQty) - {} - - mutex& UpdateStat_; - unsigned ThreadQty_; - unsigned QueryPart_; - size_t TestSetId_; - std::vector& ExpRes_; - const ExperimentConfig& config_; - const QueryCreatorType& QueryCreator_; - const Index& Method_; - unsigned MethNum_; - vector& SearchTime_; - - vector& AvgNumDistComp_; - vector& max_result_size_; - vector& avg_result_size_; - vector& DistCompQty_; - - vector queryIds; - vector> queries; // queries with results - }; - - template - struct BenchmarkThread { - void operator ()(BenchmarkThreadParams& prm) { - size_t numquery = prm.config_.GetQueryObjects().size(); - - WallClockTimer wtm; - - wtm.reset(); - - unsigned MethNum = prm.MethNum_; - unsigned QueryPart = prm.QueryPart_; - unsigned ThreadQty = prm.ThreadQty_; - - for (size_t q = 0; q < numquery; ++q) { - if ((q % ThreadQty) == QueryPart) { - unique_ptr query(prm.QueryCreator_(prm.config_.GetSpace(), - prm.config_.GetQueryObjects()[q])); - uint64_t t1 = wtm.split(); - prm.Method_.Search(query.get()); - uint64_t t2 = wtm.split(); - - { - lock_guard g(prm.UpdateStat_); - - prm.ExpRes_[MethNum]->AddDistComp(prm.TestSetId_, query->DistanceComputations()); - prm.ExpRes_[MethNum]->AddQueryTime(prm.TestSetId_, (1.0*t2 - t1)/1e3); - - - prm.DistCompQty_[MethNum] += query->DistanceComputations(); - prm.avg_result_size_[MethNum] += query->ResultSize(); - - if (query->ResultSize() > prm.max_result_size_[MethNum]) { - prm.max_result_size_[MethNum] = query->ResultSize(); - } - - prm.queryIds.push_back(q); - prm.queries.push_back(std::move(query)); - } - } - } - } - }; template static void Execute(bool LogInfo, unsigned ThreadTestQty, size_t TestSetId, @@ -259,41 +164,51 @@ class Experiments { if (!ThreadTestQty) ThreadTestQty = 1; - vector*> ThreadParams(ThreadTestQty); - vector Threads(ThreadTestQty); - AutoVectDel> DelThreadParams(ThreadParams); + vector> QueryIds; + vector>> Queries; // queries with results + QueryIds.resize(ThreadTestQty); + Queries.resize(ThreadTestQty); - for (unsigned QueryPart = 0; QueryPart < ThreadTestQty; ++QueryPart) { - ThreadParams[QueryPart] = new BenchmarkThreadParams( - UpdateStat, - ThreadTestQty, - QueryPart, - TestSetId, - ExpRes, - config, - QueryCreator, - Method, - MethNum, - SearchTime, - AvgNumDistComp, - max_result_size, - avg_result_size, - DistCompQty); - } + /* + * Because each thread uses its own parameter set, we must use + * exactly ThreadTestQty sets. + */ + ParallelFor(0, ThreadTestQty, ThreadTestQty, [&](unsigned QueryPart) { + size_t numquery = config.GetQueryObjects().size(); - if (ThreadTestQty> 1) { - for (unsigned QueryPart = 0; QueryPart < ThreadTestQty; ++QueryPart) { - Threads[QueryPart] = std::thread(BenchmarkThread(), - ref(*ThreadParams[QueryPart])); - } - for (unsigned QueryPart = 0; QueryPart < ThreadTestQty; ++QueryPart) { - Threads[QueryPart].join(); + WallClockTimer wtm; + + wtm.reset(); + + for (size_t q = 0; q < numquery; ++q) { + if ((q % ThreadTestQty) == QueryPart) { + unique_ptr query(QueryCreator(config.GetSpace(), + config.GetQueryObjects()[q])); + uint64_t t1 = wtm.split(); + Method.Search(query.get()); + uint64_t t2 = wtm.split(); + + { + lock_guard g(UpdateStat); + + ExpRes[MethNum]->AddDistComp(TestSetId, query->DistanceComputations()); + ExpRes[MethNum]->AddQueryTime(TestSetId, (1.0*t2 - t1)/1e3); + + + DistCompQty[MethNum] += query->DistanceComputations(); + avg_result_size[MethNum] += query->ResultSize(); + + if (query->ResultSize() > max_result_size[MethNum]) { + max_result_size[MethNum] = query->ResultSize(); + } + + QueryIds[QueryPart].push_back(q); + Queries[QueryPart].push_back(std::move(query)); + } + } } - } else { - CHECK(ThreadTestQty == 1); - BenchmarkThread()(*ThreadParams[0]); - } + }); wtm.split(); @@ -309,11 +224,9 @@ class Experiments { if (LogInfo) LOG(LIB_INFO) << ">>>> Computing effectiveness metrics for " << Method.StrDesc(); for (unsigned QueryPart = 0; QueryPart < ThreadTestQty; ++QueryPart) { - const BenchmarkThreadParams* params = ThreadParams[QueryPart]; - - for (size_t qi = 0; qi < params->queries.size(); ++qi) { - size_t q = params->queryIds[qi] ; - const QueryType* pQuery = params->queries[qi].get(); + for (size_t qi = 0; qi < Queries[QueryPart].size(); ++qi) { + size_t q = QueryIds[QueryPart][qi] ; + const QueryType* pQuery = Queries[QueryPart][qi].get(); unique_ptr queryGS(QueryCreator(config.GetSpace(), config.GetQueryObjects()[q])); diff --git a/similarity_search/include/ported_boost_progress.h b/similarity_search/include/ported_boost_progress.h index 773309b..a6d7d0a 100644 --- a/similarity_search/include/ported_boost_progress.h +++ b/similarity_search/include/ported_boost_progress.h @@ -69,6 +69,11 @@ class ProgressDisplay { return _count; } + // Effects: increments enough to display a complete progress + void finish() { + operator+=(expected_count() - count()); + } + unsigned long operator++() { return operator+=( 1 ); } unsigned long count() const { return _count; } unsigned long expected_count() const { return _expected_count; } diff --git a/similarity_search/include/sort_arr_bi.h b/similarity_search/include/sort_arr_bi.h index 6ced26f..ec01c90 100644 --- a/similarity_search/include/sort_arr_bi.h +++ b/similarity_search/include/sort_arr_bi.h @@ -74,7 +74,8 @@ class SortArrBI { } void sort() { - _mm_prefetch(&v_[0], _MM_HINT_T0); + if (!v.empty()) + _mm_prefetch(&v_[0], _MM_HINT_T0); std::sort(v_.begin(), v_.begin() + num_elems_); } @@ -86,7 +87,7 @@ class SortArrBI { // it also assumes a non-empty array size_t push_or_replace_non_empty(const KeyType& key, const DataType& data) { // num_elems_ > 0 - unsigned curr = num_elems_ - 1; + size_t curr = num_elems_ - 1; if (v_[curr].key <= key) { if (num_elems_ < v_.size()) { v_[num_elems_].used = false; @@ -99,7 +100,7 @@ class SortArrBI { } while (curr > 0) { - unsigned j = curr - 1; + size_t j = curr - 1; if (v_[j].key <= key) break; curr = j; } @@ -107,7 +108,9 @@ class SortArrBI { if (num_elems_ < v_.size()) num_elems_++; // curr + 1 <= num_elems_ _mm_prefetch((char *)&v_[curr], _MM_HINT_T0); - memmove((char *)&v_[curr+1], &v_[curr], (num_elems_ - (1 + curr)) * sizeof(v_[0])); + + if (num_elems_ - (1 + curr) > 0) + memmove((char *)&v_[curr+1], &v_[curr], (num_elems_ - (1 + curr)) * sizeof(v_[0])); v_[curr].used = false; v_[curr].key = key; @@ -150,9 +153,11 @@ class SortArrBI { return ret; } + // Checking for duplicate IDs isn't the responsibility of this function + // it also assumes a non-empty array size_t push_or_replace_non_empty_exp(const KeyType& key, const DataType& data) { // num_elems_ > 0 - unsigned curr = num_elems_ - 1; + size_t curr = num_elems_ - 1; if (v_[curr].key <= key) { if (num_elems_ < v_.size()) { v_[num_elems_].used = false; @@ -163,9 +168,9 @@ class SortArrBI { return num_elems_; } } - unsigned prev = curr; + size_t prev = curr; - unsigned d=1; + size_t d=1; // always curr >= d while (curr > 0 && v_[curr].key > key) { prev = curr; @@ -182,7 +187,7 @@ class SortArrBI { if (num_elems_ < v_.size()) num_elems_++; // curr + 1 <= num_elems_ - if(num_elems_ - (1 + curr) > 0) + if (num_elems_ - (1 + curr) > 0) memmove(&v_[curr+1], &v_[curr], (num_elems_ - (1 + curr)) * sizeof(v_[0])); diff --git a/similarity_search/src/method/hnsw.cc b/similarity_search/src/method/hnsw.cc index 9a2dc1a..eac7cf1 100644 --- a/similarity_search/src/method/hnsw.cc +++ b/similarity_search/src/method/hnsw.cc @@ -213,10 +213,12 @@ namespace similarity { { unique_lock lock(ElListGuard_); ElList_[id] = node; + if (progress_bar) + ++(*progress_bar); } - if (progress_bar) - ++(*progress_bar); }); + if (progress_bar) + progress_bar->finish(); if (post_ == 1 || post_ == 2) { vector temp; @@ -239,9 +241,11 @@ namespace similarity { { unique_lock lock(ElListGuard_); ElList_[id] = node; + if (progress_bar1) + ++(*progress_bar1); } if (progress_bar1) - ++(*progress_bar1); + progress_bar1->finish(); }); int maxF = 0; @@ -1209,7 +1213,9 @@ namespace similarity { for (auto iter = neighbor.begin(); iter != neighbor.end(); ++iter) { _mm_prefetch((char *)(*iter)->getData(), _MM_HINT_T0); - _mm_prefetch((char *)(massVisited + (*iter)->getId()), _MM_HINT_T0); + IdType curId = (*iter)->getId(); + CHECK(curId >= 0 && curId < this->data_.size()); + _mm_prefetch((char *)(massVisited + curId), _MM_HINT_T0); } // calculate distance to each neighbor for (auto iter = neighbor.begin(); iter != neighbor.end(); ++iter) {