diff --git a/similarity_search/include/index.h b/similarity_search/include/index.h index 8b69414..b49f5cb 100644 --- a/similarity_search/include/index.h +++ b/similarity_search/include/index.h @@ -50,6 +50,8 @@ class KNNQuery; template class Index { public: + Index(const ObjectVector& data) : data_(data) {} + // Create an index using given parameters virtual void CreateIndex(const AnyParams& indexParams) = 0; // SaveIndex is not necessarily implemented @@ -96,6 +98,11 @@ class Index { bool checkIDs = false/* this is a debug flag only, turning it on may affect performance */) { throw runtime_error("DeleteBatch is not implemented!"); } + + virtual size_t GetSize() const { return data_.size(); } +protected: + const ObjectVector& data_; + private: template void GenericSearch(QueryType* query, IdType) const; diff --git a/similarity_search/include/method/bbtree.h b/similarity_search/include/method/bbtree.h index 57da483..c429224 100644 --- a/similarity_search/include/method/bbtree.h +++ b/similarity_search/include/method/bbtree.h @@ -111,8 +111,6 @@ class BBTree : public Index { DISABLE_COPY_AND_ASSIGN(BBNode); }; - const ObjectVector& data_; - unique_ptr root_node_; size_t BucketSize_; int MaxLeavesToVisit_; diff --git a/similarity_search/include/method/dummy.h b/similarity_search/include/method/dummy.h index 9e560a2..3cdf6f0 100644 --- a/similarity_search/include/method/dummy.h +++ b/similarity_search/include/method/dummy.h @@ -42,7 +42,7 @@ class DummyMethod : public Index { * So, we can memorize them safely. */ DummyMethod(Space& space, - const ObjectVector& data) : data_(data), space_(space) {} + const ObjectVector& data) : Index(data), space_(space) {} /* * This function is supposed to create a search index (or call a @@ -105,7 +105,6 @@ class DummyMethod : public Index { private: bool data_duplicate_; - const ObjectVector& data_; Space& space_; bool bDoSeqSearch_; // disable copy and assign diff --git a/similarity_search/include/method/ghtree.h b/similarity_search/include/method/ghtree.h index 3f2df92..f5aa1d4 100644 --- a/similarity_search/include/method/ghtree.h +++ b/similarity_search/include/method/ghtree.h @@ -77,7 +77,6 @@ class GHTree : public Index { }; const Space& space_; - const ObjectVector& data_; bool use_random_center_; unique_ptr root_; diff --git a/similarity_search/include/method/hnsw.h b/similarity_search/include/method/hnsw.h index a5ea2b8..c1b4305 100644 --- a/similarity_search/include/method/hnsw.h +++ b/similarity_search/include/method/hnsw.h @@ -529,7 +529,6 @@ namespace similarity { unsigned int enterpointId_; unsigned int totalElementsStored_; - const ObjectVector &data_; // We do not copy objects ObjectVector data_rearranged_; VisitedListPool *visitedlistpool; diff --git a/similarity_search/include/method/list_clusters.h b/similarity_search/include/method/list_clusters.h index ffe67d9..62d7882 100644 --- a/similarity_search/include/method/list_clusters.h +++ b/similarity_search/include/method/list_clusters.h @@ -93,7 +93,6 @@ class ListClusters : public Index { }; const Space& space_; - const ObjectVector& data_; std::vector cluster_list_; diff --git a/similarity_search/include/method/multi_index.h b/similarity_search/include/method/multi_index.h index d8cebfc..9d3ca83 100644 --- a/similarity_search/include/method/multi_index.h +++ b/similarity_search/include/method/multi_index.h @@ -61,7 +61,6 @@ class MultiIndex : public Index { std::vector*> indices_; Space& space_; - const ObjectVector& data_; string SpaceType_; bool PrintProgress_; size_t IndexQty_; diff --git a/similarity_search/include/method/multi_vantage_point_tree.h b/similarity_search/include/method/multi_vantage_point_tree.h index e23d3fa..5fe36a3 100644 --- a/similarity_search/include/method/multi_vantage_point_tree.h +++ b/similarity_search/include/method/multi_vantage_point_tree.h @@ -165,7 +165,6 @@ class MultiVantagePointTree : public Index { void GenericSearch(Node* node, QueryType* query, Dists& path, size_t query_path_len, int& MaxLeavesToVisit) const; const Space& space_; - const ObjectVector& data_; unique_ptr root_; // root node size_t MaxPathLength_; // the number of distances for the data diff --git a/similarity_search/include/method/nonmetr_list_clust.h b/similarity_search/include/method/nonmetr_list_clust.h index d340038..014c3ce 100644 --- a/similarity_search/include/method/nonmetr_list_clust.h +++ b/similarity_search/include/method/nonmetr_list_clust.h @@ -46,7 +46,7 @@ class NonMetrListClust : public Index { public: NonMetrListClust(bool printProgress, Space& space, - const ObjectVector& data) : printProgress_(printProgress), data_(data), space_(space) { + const ObjectVector& data) : Index(data), printProgress_(printProgress), space_(space) { maxObjId_ = 0; for (const Object* o: data) { maxObjId_ = max(maxObjId_, o->id()); @@ -79,7 +79,6 @@ class NonMetrListClust : public Index { private: bool printProgress_; - const ObjectVector& data_; Space& space_; size_t db_scan_; diff --git a/similarity_search/include/method/omedrank.h b/similarity_search/include/method/omedrank.h index df7b7f6..24a3815 100644 --- a/similarity_search/include/method/omedrank.h +++ b/similarity_search/include/method/omedrank.h @@ -83,7 +83,6 @@ class OMedRank : public Index { void IndexChunk(size_t chunkId, ProgressDisplay* displayBar); const Space& space_; - const ObjectVector& data_; bool PrintProgress_; size_t num_pivot_; @@ -117,8 +116,8 @@ class OMedRank : public Index { // Heuristics: try to read db_scan_fraction/index_qty entries from each index part // or alternatively K * knn_amp_ entries, for KNN-search size_t computeDbScan(size_t K) const { - if (knn_amp_) { return min(K * knn_amp_, data_.size()); } - return static_cast(db_scan_frac_ * data_.size()); + if (knn_amp_) { return min(K * knn_amp_, this->data_.size()); } + return static_cast(db_scan_frac_ * this->data_.size()); } template void GenSearch(QueryType* query, size_t K) const; diff --git a/similarity_search/include/method/perm_bin_vptree.h b/similarity_search/include/method/perm_bin_vptree.h index d977d7e..0723e4e 100644 --- a/similarity_search/include/method/perm_bin_vptree.h +++ b/similarity_search/include/method/perm_bin_vptree.h @@ -60,7 +60,6 @@ class PermBinVPTree : public Index { private: Space& space_; - const ObjectVector& data_; bool PrintProgress_; size_t bin_threshold_; size_t bin_perm_word_qty_; diff --git a/similarity_search/include/method/perm_index_incr_bin.h b/similarity_search/include/method/perm_index_incr_bin.h index 82592c3..836144e 100644 --- a/similarity_search/include/method/perm_index_incr_bin.h +++ b/similarity_search/include/method/perm_index_incr_bin.h @@ -54,7 +54,6 @@ class PermutationIndexIncrementalBin : public Index { private: const Space& space_; - const ObjectVector& data_; bool PrintProgress_; ObjectVector pivot_; @@ -71,8 +70,8 @@ class PermutationIndexIncrementalBin : public Index { std::vector permtable_; size_t computeDbScan(size_t K) const { - if (knn_amp_) { return min(K * knn_amp_, data_.size()); } - return static_cast(db_scan_frac_ * data_.size()); + if (knn_amp_) { return min(K * knn_amp_, this->data_.size()); } + return static_cast(db_scan_frac_ * this->data_.size()); } template void GenSearch(QueryType* query, size_t K) const; diff --git a/similarity_search/include/method/perm_lsh_bin.h b/similarity_search/include/method/perm_lsh_bin.h index 4224c5c..1a89505 100644 --- a/similarity_search/include/method/perm_lsh_bin.h +++ b/similarity_search/include/method/perm_lsh_bin.h @@ -52,7 +52,6 @@ class PermutationIndexLSHBin : public Index { void SetQueryTimeParams(const AnyParams &) override {} private: const Space& space_; - const ObjectVector& data_; bool printProgress_; size_t num_pivot_; diff --git a/similarity_search/include/method/permutation_inverted_index.h b/similarity_search/include/method/permutation_inverted_index.h index 8166f7a..381d97b 100644 --- a/similarity_search/include/method/permutation_inverted_index.h +++ b/similarity_search/include/method/permutation_inverted_index.h @@ -55,7 +55,6 @@ class PermutationInvertedIndex : public Index { private: const Space& space_; - const ObjectVector& data_; bool PrintProgress_; float db_scan_frac_; @@ -67,8 +66,8 @@ class PermutationInvertedIndex : public Index { ObjectVector pivot_; size_t computeDbScan(size_t K) const { - if (knn_amp_) { return min(K * knn_amp_, data_.size()); } - return static_cast(db_scan_frac_ * data_.size()); + if (knn_amp_) { return min(K * knn_amp_, this->data_.size()); } + return static_cast(db_scan_frac_ * this->data_.size()); } struct ObjectInvEntry { diff --git a/similarity_search/include/method/permutation_prefix_index.h b/similarity_search/include/method/permutation_prefix_index.h index 8028d56..9fe0628 100644 --- a/similarity_search/include/method/permutation_prefix_index.h +++ b/similarity_search/include/method/permutation_prefix_index.h @@ -56,8 +56,8 @@ class PermutationPrefixIndex : public Index { private: size_t computeDbScan(size_t K) const { - if (knn_amp_) { return min(K * knn_amp_, data_.size()); } - return static_cast(min(min_candidate_, data_.size())); + if (knn_amp_) { return min(K * knn_amp_, this->data_.size()); } + return static_cast(min(min_candidate_, this->data_.size())); } @@ -65,7 +65,6 @@ class PermutationPrefixIndex : public Index { void GenSearch(QueryType* query, size_t K) const; const Space& space_; - const ObjectVector& data_; bool PrintProgress_; // permutation prefix length (l in the original paper) in (0, num_pivot] diff --git a/similarity_search/include/method/pivot_neighb_invindx.h b/similarity_search/include/method/pivot_neighb_invindx.h index ee583e9..e6b4170 100644 --- a/similarity_search/include/method/pivot_neighb_invindx.h +++ b/similarity_search/include/method/pivot_neighb_invindx.h @@ -85,7 +85,6 @@ class PivotNeighbInvertedIndex : public Index { void SetQueryTimeParams(const AnyParams& QueryTimeParams) override; private: - const ObjectVector& data_; const Space& space_; bool PrintProgress_; bool recreate_points_; @@ -130,11 +129,11 @@ class PivotNeighbInvertedIndex : public Index { ObjectVector genPivot_; // generated pivots size_t computeDbScan(size_t K, size_t chunkQty) const { - size_t totalDbScan = static_cast(db_scan_frac_ * data_.size()); + size_t totalDbScan = static_cast(db_scan_frac_ * this->data_.size()); if (knn_amp_) { totalDbScan = K * knn_amp_; } - totalDbScan = min(totalDbScan, data_.size()); + totalDbScan = min(totalDbScan, this->data_.size()); CHECK_MSG(chunkQty, "Bug or inconsistent parameters: the number of index chunks cannot be zero!"); return (totalDbScan + chunkQty - 1) / chunkQty; } diff --git a/similarity_search/include/method/proj_vptree.h b/similarity_search/include/method/proj_vptree.h index 2f59cc0..454a84e 100644 --- a/similarity_search/include/method/proj_vptree.h +++ b/similarity_search/include/method/proj_vptree.h @@ -53,7 +53,6 @@ class ProjectionVPTree : public Index { Space& space_; - const ObjectVector& data_; bool PrintProgress_; size_t K_; @@ -61,8 +60,8 @@ class ProjectionVPTree : public Index { float db_scan_frac_; size_t computeDbScan(size_t K) const { - if (knn_amp_) { return min(K * knn_amp_, data_.size()); } - return static_cast(db_scan_frac_ * data_.size()); + if (knn_amp_) { return min(K * knn_amp_, this->data_.size()); } + return static_cast(db_scan_frac_ * this->data_.size()); } unique_ptr > projObj_; diff --git a/similarity_search/include/method/projection_index_incremental.h b/similarity_search/include/method/projection_index_incremental.h index f6018e4..bff1bcc 100644 --- a/similarity_search/include/method/projection_index_incremental.h +++ b/similarity_search/include/method/projection_index_incremental.h @@ -60,7 +60,6 @@ class ProjectionIndexIncremental : public Index { private: const Space& space_; - const ObjectVector& data_; bool PrintProgress_; float max_proj_dist_; @@ -80,8 +79,8 @@ class ProjectionIndexIncremental : public Index { #endif size_t computeDbScan(size_t K) const { - if (knn_amp_) { return min(K * knn_amp_, data_.size()); } - return static_cast(db_scan_frac_ * data_.size()); + if (knn_amp_) { return min(K * knn_amp_, this->data_.size()); } + return static_cast(db_scan_frac_ * this->data_.size()); } template void GenSearch(QueryType* query, size_t K) const; diff --git a/similarity_search/include/method/seqsearch.h b/similarity_search/include/method/seqsearch.h index 7b92520..d08d295 100644 --- a/similarity_search/include/method/seqsearch.h +++ b/similarity_search/include/method/seqsearch.h @@ -43,9 +43,10 @@ class SeqSearch : public Index { void Search(KNNQuery* query, IdType) const override; void SetQueryTimeParams(const AnyParams& params) override {} + + size_t GetSize() const override { return getData().size(); } private: Space& space_; - const ObjectVector& origData_; char* cacheOptimizedBucket_; ObjectVector* pData_; @@ -53,7 +54,7 @@ class SeqSearch : public Index { IdTypeUnsign threadQty_; vector vvThreadData; - const ObjectVector& getData() const { return pData_ != NULL ? *pData_ : origData_; } + const ObjectVector& getData() const { return pData_ != NULL ? *pData_ : this->data_; } // disable copy and assign DISABLE_COPY_AND_ASSIGN(SeqSearch); }; diff --git a/similarity_search/include/method/simple_inverted_index.h b/similarity_search/include/method/simple_inverted_index.h index 4645916..7dbf61b 100644 --- a/similarity_search/include/method/simple_inverted_index.h +++ b/similarity_search/include/method/simple_inverted_index.h @@ -39,7 +39,7 @@ class SimplInvIndex : public Index { * So, we can memorize them safely. */ SimplInvIndex(Space& space, - const ObjectVector& data) : data_(data), + const ObjectVector& data) : Index(data), pSpace_(dynamic_cast(&space)) { if (pSpace_ == nullptr) { PREPARE_RUNTIME_ERR(err) << @@ -119,7 +119,6 @@ class SimplInvIndex : public Index { : post_(&pl), post_pos_(0), qval_(qval), qval_x_docval_(qval_x_docval) {} }; - const ObjectVector& data_; SpaceSparseNegativeScalarProductFast* pSpace_; std::unordered_map> index_; // disable copy and assign diff --git a/similarity_search/include/method/small_world_rand.h b/similarity_search/include/method/small_world_rand.h index 3a97c4e..6f4fb17 100644 --- a/similarity_search/include/method/small_world_rand.h +++ b/similarity_search/include/method/small_world_rand.h @@ -286,7 +286,6 @@ class SmallWorldRand : public Index { ObjectVector pivots_; const Space& space_; - const ObjectVector& data_; // We don't copy data bool PrintProgress_; bool use_proxy_dist_; diff --git a/similarity_search/include/method/spatial_approx_tree.h b/similarity_search/include/method/spatial_approx_tree.h index ca2074f..2038f59 100644 --- a/similarity_search/include/method/spatial_approx_tree.h +++ b/similarity_search/include/method/spatial_approx_tree.h @@ -56,7 +56,6 @@ class SpatialApproxTree : public Index { class SATNode; const Space& space_; - const ObjectVector data_; unique_ptr root_; }; diff --git a/similarity_search/include/method/vptree.h b/similarity_search/include/method/vptree.h index 5e67204..392e397 100644 --- a/similarity_search/include/method/vptree.h +++ b/similarity_search/include/method/vptree.h @@ -106,7 +106,6 @@ class VPTree : public Index { }; Space& space_; - const ObjectVector& data_; bool PrintProgress_; bool use_random_center_; size_t max_pivot_select_attempts_; diff --git a/similarity_search/src/method/bbtree.cc b/similarity_search/src/method/bbtree.cc index 1305422..2a66ad4 100644 --- a/similarity_search/src/method/bbtree.cc +++ b/similarity_search/src/method/bbtree.cc @@ -43,7 +43,7 @@ using std::unique_ptr; template BBTree::BBTree( const Space& space, - const ObjectVector& data) : data_(data) { + const ObjectVector& data) : Index(data) { BregmanDivSpace_ = BregmanDiv::ConvertFrom(&space); // Should be the special space! } @@ -61,7 +61,7 @@ void BBTree::CreateIndex(const AnyParams& MethParams) { pmgr.CheckUnused(); - root_node_.reset(new BBNode(BregmanDivSpace_, data_, BucketSize_, ChunkBucket_)); + root_node_.reset(new BBNode(BregmanDivSpace_, this->data_, BucketSize_, ChunkBucket_)); } template diff --git a/similarity_search/src/method/dummy.cc b/similarity_search/src/method/dummy.cc index 8984cc6..f08591a 100644 --- a/similarity_search/src/method/dummy.cc +++ b/similarity_search/src/method/dummy.cc @@ -24,8 +24,8 @@ namespace similarity { template void DummyMethod::Search(RangeQuery* query, IdType) const { if (bDoSeqSearch_) { - for (size_t i = 0; i < data_.size(); ++i) { - query->CheckAndAddToResult(data_[i]); + for (size_t i = 0; i < this->data_.size(); ++i) { + query->CheckAndAddToResult(this->data_[i]); } } else { for (int i =0; i < 100000; ++i); @@ -35,8 +35,8 @@ void DummyMethod::Search(RangeQuery* query, IdType) const { template void DummyMethod::Search(KNNQuery* query, IdType) const { if (bDoSeqSearch_) { - for (size_t i = 0; i < data_.size(); ++i) { - query->CheckAndAddToResult(data_[i]); + for (size_t i = 0; i < this->data_.size(); ++i) { + query->CheckAndAddToResult(this->data_[i]); } } else { for (int i =0; i < 100000; ++i); diff --git a/similarity_search/src/method/ghtree.cc b/similarity_search/src/method/ghtree.cc index 99fc178..f1af4e9 100644 --- a/similarity_search/src/method/ghtree.cc +++ b/similarity_search/src/method/ghtree.cc @@ -28,8 +28,8 @@ template GHTree::GHTree(const Space& space, const ObjectVector& data, bool use_random_center) : - space_(space), - data_(data), + Index(data), + space_(space), use_random_center_(use_random_center) { } @@ -46,7 +46,7 @@ void GHTree::CreateIndex(const AnyParams& IndexParams) { pmgr.CheckUnused(); this->ResetQueryTimeParams(); - root_.reset(new GHNode(space_, data_, + root_.reset(new GHNode(space_, this->data_, BucketSize_, ChunkBucket_, use_random_center_ /* random center */)); } diff --git a/similarity_search/src/method/hnsw.cc b/similarity_search/src/method/hnsw.cc index 8f56637..a9bd09d 100644 --- a/similarity_search/src/method/hnsw.cc +++ b/similarity_search/src/method/hnsw.cc @@ -84,9 +84,9 @@ namespace similarity { template Hnsw::Hnsw(bool PrintProgress, const Space &space, const ObjectVector &data) - : space_(space) + : Index(data) + , space_(space) , PrintProgress_(PrintProgress) - , data_(data) , visitedlistpool(nullptr) , enterpoint_(nullptr) , data_level0_memory_(nullptr) @@ -193,24 +193,24 @@ namespace similarity { SetQueryTimeParams(getEmptyParams()); - if (data_.empty()) { + if (this->data_.empty()) { pmgr.CheckUnused(); return; } - ElList_.resize(data_.size()); + ElList_.resize(this->data_.size()); // One entry should be added before all the threads are started, or else add() will not work properly - HnswNode *first = new HnswNode(data_[0], 0 /* id == 0 */); + HnswNode *first = new HnswNode(this->data_[0], 0 /* id == 0 */); first->init(getRandomLevel(mult_), maxM_, maxM0_); maxlevel_ = first->level; enterpoint_ = first; ElList_[0] = first; - visitedlistpool = new VisitedListPool(indexThreadQty_, data_.size()); + visitedlistpool = new VisitedListPool(indexThreadQty_, this->data_.size()); - unique_ptr progress_bar(PrintProgress_ ? new ProgressDisplay(data_.size(), cerr) : NULL); + unique_ptr progress_bar(PrintProgress_ ? new ProgressDisplay(this->data_.size(), cerr) : NULL); - ParallelFor(1, data_.size(), indexThreadQty_, [&](int id) { - HnswNode *node = new HnswNode(data_[id], id); + ParallelFor(1, this->data_.size(), indexThreadQty_, [&](int id) { + HnswNode *node = new HnswNode(this->data_[id], id); add(&space_, node); { unique_lock lock(ElListGuard_); @@ -223,20 +223,20 @@ namespace similarity { if (post_ == 1 || post_ == 2) { vector temp; temp.swap(ElList_); - ElList_.resize(data_.size()); - first = new HnswNode(data_[0], 0 /* id == 0 */); + ElList_.resize(this->data_.size()); + first = new HnswNode(this->data_[0], 0 /* id == 0 */); first->init(getRandomLevel(mult_), maxM_, maxM0_); maxlevel_ = first->level; enterpoint_ = first; ElList_[0] = first; /// Making the same index in reverse order - unique_ptr progress_bar1(PrintProgress_ ? new ProgressDisplay(data_.size(), cerr) : NULL); + unique_ptr progress_bar1(PrintProgress_ ? new ProgressDisplay(this->data_.size(), cerr) : NULL); - ParallelFor(1, data_.size(), indexThreadQty_, [&](int pos_id) { + ParallelFor(1, this->data_.size(), indexThreadQty_, [&](int pos_id) { // reverse ordering (so we iterate decreasing). given // parallelfor, this might not make a difference - int id = data_.size() - pos_id; - HnswNode *node = new HnswNode(data_[id], id); + int id = this->data_.size() - pos_id; + HnswNode *node = new HnswNode(this->data_[id], id); add(&space_, node); { unique_lock lock(ElListGuard_); @@ -248,7 +248,7 @@ namespace similarity { int maxF = 0; // int degrees[100] = {0}; - ParallelFor(1, data_.size(), indexThreadQty_, [&](int id) { + ParallelFor(1, this->data_.size(), indexThreadQty_, [&](int id) { HnswNode *node1 = ElList_[id]; HnswNode *node2 = temp[id]; vector f1 = node1->getAllFriends(0); diff --git a/similarity_search/src/method/list_clusters.cc b/similarity_search/src/method/list_clusters.cc index f325b8d..7db7922 100644 --- a/similarity_search/src/method/list_clusters.cc +++ b/similarity_search/src/method/list_clusters.cc @@ -55,7 +55,7 @@ ListClusters::SetQueryTimeParams(const AnyParams& QueryTimeParams) { template ListClusters::ListClusters( const Space& space, - const ObjectVector& data) : space_(space), data_(data) { } + const ObjectVector& data) : Index(data), space_(space) { } template void ListClusters::CreateIndex(const AnyParams& IndexParams) @@ -93,7 +93,7 @@ void ListClusters::CreateIndex(const AnyParams& IndexParams) // DistObjectPairVector remaining; - for (const auto& object : data_) { + for (const auto& object : this->data_) { remaining.push_back(std::make_pair(0, object)); } diff --git a/similarity_search/src/method/multi_index.cc b/similarity_search/src/method/multi_index.cc index 01a6b70..aaf6f33 100644 --- a/similarity_search/src/method/multi_index.cc +++ b/similarity_search/src/method/multi_index.cc @@ -36,7 +36,7 @@ MultiIndex::MultiIndex( bool PrintProgress, const string& SpaceType, Space& space, - const ObjectVector& data) : space_(space), data_(data), SpaceType_(SpaceType), PrintProgress_(PrintProgress) {} + const ObjectVector& data) : Index(data), space_(space), SpaceType_(SpaceType), PrintProgress_(PrintProgress) {} template @@ -54,7 +54,7 @@ void MultiIndex::CreateIndex(const AnyParams& IndexParams) { MethodName_, SpaceType_, space_, - data_)); + this->data_)); indices_.back()->CreateIndex(RemainParams); } diff --git a/similarity_search/src/method/multi_vantage_point_tree.cc b/similarity_search/src/method/multi_vantage_point_tree.cc index 2ac943e..12bc91a 100644 --- a/similarity_search/src/method/multi_vantage_point_tree.cc +++ b/similarity_search/src/method/multi_vantage_point_tree.cc @@ -28,7 +28,7 @@ namespace similarity { template MultiVantagePointTree::MultiVantagePointTree( const Space& space, - const ObjectVector& data) : space_(space), data_(data) { + const ObjectVector& data) : Index(data), space_(space) { } template @@ -50,9 +50,9 @@ void MultiVantagePointTree::CreateIndex(const AnyParams& IndexParams) { } Entries entries; - entries.reserve(data_.size()); - for (size_t i = 0; i < data_.size(); ++i) { - entries.push_back(Entry(data_[i])); + entries.reserve(this->data_.size()); + for (size_t i = 0; i < this->data_.size(); ++i) { + entries.push_back(Entry(this->data_[i])); } root_.reset(BuildTree(&space_, entries)); } diff --git a/similarity_search/src/method/nonmetr_list_clust.cc b/similarity_search/src/method/nonmetr_list_clust.cc index c5cfc2e..c5df117 100644 --- a/similarity_search/src/method/nonmetr_list_clust.cc +++ b/similarity_search/src/method/nonmetr_list_clust.cc @@ -43,7 +43,7 @@ void NonMetrListClust::CreateIndex(const AnyParams& IndexParams) { LOG(LIB_INFO) << "searchCloseIterQty=" << searchCloseIterQty; LOG(LIB_INFO) << "sampleDistQty=" << sampleDistQty; - ClusterUtils::doFIRMAL(printProgress_, space_, data_, centerQty, vCenters_, vClusterAssign_, vUnassigned_, + ClusterUtils::doFIRMAL(printProgress_, space_, this->data_, centerQty, vCenters_, vClusterAssign_, vUnassigned_, searchCloseIterQty, sampleDistQty, true /* do use all previous clusters in each iteration */); } else if (clusterType == CLUST_TYPE_CLARAN) { size_t randRestQty = CLARANS_RAND_RESTART_QTY; @@ -54,7 +54,7 @@ void NonMetrListClust::CreateIndex(const AnyParams& IndexParams) { pmgr.GetParamOptional("inClusterSampleQty", inClusterSampleQty, inClusterSampleQty); LOG(LIB_INFO) << "randRestQty=" << randRestQty; - ClusterUtils::doCLARANS(printProgress_, space_, data_, centerQty, vCenters_, vClusterAssign_, + ClusterUtils::doCLARANS(printProgress_, space_, this->data_, centerQty, vCenters_, vClusterAssign_, inClusterSwapAttempts, inClusterSampleQty, randRestQty); } else if (clusterType == CLUST_TYPE_REDUCT_CLARAN) { size_t inClusterSwapAttempts = CLARANS_SWAP_ATTEMPTS; @@ -68,7 +68,7 @@ void NonMetrListClust::CreateIndex(const AnyParams& IndexParams) { pmgr.GetParamOptional("keepFrac", keepFrac, keepFrac); LOG(LIB_INFO) << "maxMetaIterQty = " << maxMetaIterQty; LOG(LIB_INFO) << "keepFrac = " << keepFrac; - ClusterUtils::doReductiveCLARANS(printProgress_, space_, data_, + ClusterUtils::doReductiveCLARANS(printProgress_, space_, this->data_, maxMetaIterQty, keepFrac, centerQty, vCenters_, vClusterAssign_, vUnassigned_, inClusterSwapAttempts, inClusterSampleQty); @@ -139,7 +139,7 @@ NonMetrListClust::SetQueryTimeParams(const AnyParams& QueryTimeParams) { // Note that GetParamOptional() should always have a default value pmgr.GetParamOptional("dbScanFrac", dbScanFrac, 0.1); CHECK_MSG(dbScanFrac > 0 && dbScanFrac <= 1, "dbScanFrac should be >0 and <=1"); - db_scan_ = size_t(ceil(dbScanFrac * data_.size())); + db_scan_ = size_t(ceil(dbScanFrac * this->data_.size())); LOG(LIB_INFO) << "db_scan=" << db_scan_; pmgr.CheckUnused(); } diff --git a/similarity_search/src/method/omedrank.cc b/similarity_search/src/method/omedrank.cc index 3121b33..79ba3a2 100644 --- a/similarity_search/src/method/omedrank.cc +++ b/similarity_search/src/method/omedrank.cc @@ -38,7 +38,7 @@ OMedRank::OMedRank( bool PrintProgress, const Space& space, const ObjectVector& data) : - space_(space), data_(data), PrintProgress_(PrintProgress), + Index(data), space_(space), PrintProgress_(PrintProgress), index_qty_(0) // If ComputeDbScan is called before index_qty_ is computed, it will see this zero { } @@ -58,7 +58,7 @@ void OMedRank::CreateIndex(const AnyParams &IndexParams) { projection_.reset(Projection::createProjection( space_, - data_, + this->data_, proj_type_, interm_dim_, num_pivot_, @@ -70,7 +70,7 @@ void OMedRank::CreateIndex(const AnyParams &IndexParams) { " distance value type: '" + DistTypeName() + "'"); } - index_qty_ = (data_.size() + chunk_index_size_ - 1) / chunk_index_size_; + index_qty_ = (this->data_.size() + chunk_index_size_ - 1) / chunk_index_size_; pmgr.CheckUnused(); @@ -87,7 +87,7 @@ void OMedRank::CreateIndex(const AnyParams &IndexParams) { } unique_ptr progress_bar(PrintProgress_ ? - new ProgressDisplay(data_.size(), cerr) + new ProgressDisplay(this->data_.size(), cerr) :NULL); for (size_t chunkId = 0; chunkId < index_qty_; ++chunkId) { @@ -181,7 +181,7 @@ void OMedRank::GenSearch(QueryType* query, size_t K) const { size_t minMatchPivotQty = max(size_t(1), static_cast(round(min_freq_ * num_pivot_search_))); size_t minId = chunkId * chunk_index_size_; - size_t maxId = min(data_.size(), minId + chunk_index_size_); + size_t maxId = min(this->data_.size(), minId + chunk_index_size_); size_t chunkQty = (maxId - minId); CHECK(chunkQty <= chunk_index_size_); @@ -210,7 +210,7 @@ void OMedRank::GenSearch(QueryType* query, size_t K) const { if (freq == minMatchPivotQty) { // Add only the first time when we exceeded the threshold! ++scannedQty; - if (!skip_check_) query->CheckAndAddToResult(data_[objIdDiff + minId]); + if (!skip_check_) query->CheckAndAddToResult(this->data_[objIdDiff + minId]); } eof = false; } @@ -262,7 +262,7 @@ void OMedRank::SetQueryTimeParams(const AnyParams& QueryTimeParams) { template void OMedRank::IndexChunk(size_t chunkId, ProgressDisplay* displayBar) { size_t minId = chunkId * chunk_index_size_; - size_t maxId = min(data_.size(), minId + chunk_index_size_); + size_t maxId = min(this->data_.size(), minId + chunk_index_size_); auto & chunkPostLists = *posting_lists_[chunkId]; chunkPostLists.resize(num_pivot_); @@ -273,7 +273,7 @@ void OMedRank::IndexChunk(size_t chunkId, ProgressDisplay* displayBar) { for (size_t i = 0; i < maxId - minId; ++i) { IdType id = minId + i; - projection_->compProj(NULL, data_[id], &projDists[0]); + projection_->compProj(NULL, this->data_[id], &projDists[0]); for (size_t j = 0; j < num_pivot_; ++j) { /* diff --git a/similarity_search/src/method/perm_bin_vptree.cc b/similarity_search/src/method/perm_bin_vptree.cc index d872c85..0d2c03e 100644 --- a/similarity_search/src/method/perm_bin_vptree.cc +++ b/similarity_search/src/method/perm_bin_vptree.cc @@ -38,7 +38,7 @@ PermBinVPTree::PermBinVPTree( bool PrintProgress, Space& space, const ObjectVector& data) : - space_(space), data_(data), + Index(data), space_(space), PrintProgress_(PrintProgress), VPTreeSpace_(new SpaceBitHamming()) {} @@ -62,12 +62,12 @@ void PermBinVPTree::CreateIndex(const AnyParams& Ind AnyParams RemainParams = pmgr.ExtractParametersExcept({ "numPivot", "binThreshold"}); - GetPermutationPivot(data_, space_, NumPivot, &pivots_); - BinPermData_.resize(data_.size()); + GetPermutationPivot(this->data_, space_, NumPivot, &pivots_); + BinPermData_.resize(this->data_.size()); - for (size_t i = 0; i < data_.size(); ++i) { + for (size_t i = 0; i < this->data_.size(); ++i) { Permutation TmpPerm; - GetPermutation(pivots_, space_, data_[i], &TmpPerm); + GetPermutation(pivots_, space_, this->data_[i], &TmpPerm); vector binPivot; Binarize(TmpPerm, bin_threshold_, binPivot); CHECK(binPivot.size() == bin_perm_word_qty_); @@ -105,7 +105,7 @@ void PermBinVPTree::SetQueryTimeParams(const AnyPara LOG(LIB_INFO) << "Set query-time parameters for PermBinVPTree:"; LOG(LIB_INFO) << "dbScanFrac=" << dbScanFrac; - db_scan_qty_ = max(size_t(1), static_cast(dbScanFrac * data_.size())); + db_scan_qty_ = max(size_t(1), static_cast(dbScanFrac * this->data_.size())); LOG(LIB_INFO) << "db_scan_qty_=" << db_scan_qty_; @@ -115,7 +115,7 @@ void PermBinVPTree::SetQueryTimeParams(const AnyPara template PermBinVPTree::~PermBinVPTree() { - for (size_t i = 0; i < data_.size(); ++i) { + for (size_t i = 0; i < this->data_.size(); ++i) { delete BinPermData_[i]; } } @@ -146,7 +146,7 @@ void PermBinVPTree::Search(RangeQuery* query while (!ResQueue->Empty()) { size_t id = reinterpret_cast(ResQueue->TopObject())->id(); - query->CheckAndAddToResult(data_[id]); + query->CheckAndAddToResult(this->data_[id]); ResQueue->Pop(); } } @@ -170,7 +170,7 @@ void PermBinVPTree::Search(KNNQuery* query, while (!ResQueue->Empty()) { size_t id = reinterpret_cast(ResQueue->TopObject())->id(); - query->CheckAndAddToResult(data_[id]); + query->CheckAndAddToResult(this->data_[id]); ResQueue->Pop(); } } diff --git a/similarity_search/src/method/perm_index_incr_bin.cc b/similarity_search/src/method/perm_index_incr_bin.cc index ea9e1b9..869d4f0 100644 --- a/similarity_search/src/method/perm_index_incr_bin.cc +++ b/similarity_search/src/method/perm_index_incr_bin.cc @@ -33,7 +33,7 @@ PermutationIndexIncrementalBin::PermutationIndexIncrementalBi bool PrintProgress, const Space& space, const ObjectVector& data) - : space_(space), data_(data), PrintProgress_(PrintProgress) {} + : Index(data), space_(space), PrintProgress_(PrintProgress) {} template void PermutationIndexIncrementalBin::CreateIndex(const AnyParams& IndexParams) { @@ -51,17 +51,17 @@ void PermutationIndexIncrementalBin::CreateIndex(const AnyPar LOG(LIB_INFO) << "# binarization threshold = " << bin_threshold_; LOG(LIB_INFO) << "# binary entry size (words) = " << bin_perm_word_qty_; - GetPermutationPivot(data_, space_, num_pivot_, &pivot_); + GetPermutationPivot(this->data_, space_, num_pivot_, &pivot_); - permtable_.resize(data_.size() * bin_perm_word_qty_); + permtable_.resize(this->data_.size() * bin_perm_word_qty_); unique_ptr progress_bar(PrintProgress_ ? - new ProgressDisplay(data_.size(), cerr) + new ProgressDisplay(this->data_.size(), cerr) :NULL); - for (size_t i = 0, start = 0; i < data_.size(); ++i, start += bin_perm_word_qty_) { + for (size_t i = 0, start = 0; i < this->data_.size(); ++i, start += bin_perm_word_qty_) { Permutation TmpPerm; - GetPermutation(pivot_, space_, data_[i], &TmpPerm); + GetPermutation(pivot_, space_, this->data_[i], &TmpPerm); CHECK(TmpPerm.size() == num_pivot_); vector binPivot; Binarize(TmpPerm, bin_threshold_, binPivot); @@ -133,10 +133,10 @@ void PermutationIndexIncrementalBin::GenSearch(QueryType* que Binarize(perm_q, bin_threshold_, binPivot); std::vector perm_dists; - perm_dists.reserve(data_.size()); + perm_dists.reserve(this->data_.size()); if (use_sort_) { - for (size_t i = 0, start = 0; i < data_.size(); ++i, start += bin_perm_word_qty_) { + for (size_t i = 0, start = 0; i < this->data_.size(); ++i, start += bin_perm_word_qty_) { perm_dists.push_back(std::make_pair(BitHamming(&permtable_[start], &binPivot[0], bin_perm_word_qty_), i)); } @@ -144,12 +144,12 @@ void PermutationIndexIncrementalBin::GenSearch(QueryType* que for (size_t i = 0; i < db_scan; ++i) { const size_t idx = quick_select.GetNext().second; quick_select.Next(); - if (!skip_checking_) query->CheckAndAddToResult(data_[idx]); + if (!skip_checking_) query->CheckAndAddToResult(this->data_[idx]); } } else { - for (size_t i = 0, start = 0; i < data_.size(); ++i, start += bin_perm_word_qty_) { + for (size_t i = 0, start = 0; i < this->data_.size(); ++i, start += bin_perm_word_qty_) { if (BitHamming(&permtable_[start], &binPivot[0], bin_perm_word_qty_) < max_hamming_dist_) { - if (!skip_checking_) query->CheckAndAddToResult(data_[i]); + if (!skip_checking_) query->CheckAndAddToResult(this->data_[i]); } } } diff --git a/similarity_search/src/method/perm_lsh_bin.cc b/similarity_search/src/method/perm_lsh_bin.cc index fd76bd3..e668713 100644 --- a/similarity_search/src/method/perm_lsh_bin.cc +++ b/similarity_search/src/method/perm_lsh_bin.cc @@ -32,7 +32,7 @@ PermutationIndexLSHBin::PermutationIndexLSHBin( bool PrintProgress, const Space& space, const ObjectVector& data) : - space_(space), data_(data), printProgress_(PrintProgress) {} + Index(data), space_(space), printProgress_(PrintProgress) {} template void PermutationIndexLSHBin::CreateIndex(const AnyParams& IndexParams) { @@ -68,7 +68,7 @@ void PermutationIndexLSHBin::CreateIndex(const AnyParams& IndexParams) { bit_sample_flags_.resize(num_hash_); for (size_t i = 0; i < num_hash_; ++i) { - GetPermutationPivot(data_, space_, num_pivot_, &pivots_[i]); + GetPermutationPivot(this->data_, space_, num_pivot_, &pivots_[i]); bit_sample_flags_[i].resize(num_pivot_); @@ -113,11 +113,11 @@ void PermutationIndexLSHBin::CreateIndex(const AnyParams& IndexParams) { } unique_ptr progress_bar(printProgress_ ? - new ProgressDisplay(data_.size(), cerr) + new ProgressDisplay(this->data_.size(), cerr) :NULL); - for (size_t id = 0; id < data_.size(); ++id) { + for (size_t id = 0; id < this->data_.size(); ++id) { for (size_t hashId = 0; hashId < num_hash_; ++hashId) { - size_t val = computeHashValue(hashId, data_[id], NULL); // Already <= hash_table_size_; + size_t val = computeHashValue(hashId, this->data_[id], NULL); // Already <= hash_table_size_; //cout << val << endl; if (!hash_tables_[hashId][val]) { hash_tables_[hashId][val] = new vector(); @@ -152,7 +152,7 @@ void PermutationIndexLSHBin::GenSearch(QueryType* query) const { * times. At the same time, other relevant entries will be removed! */ if (!found.count(id)) { - query->CheckAndAddToResult(data_[id]); + query->CheckAndAddToResult(this->data_[id]); found.insert(id); } } diff --git a/similarity_search/src/method/permutation_inverted_index.cc b/similarity_search/src/method/permutation_inverted_index.cc index 4439694..243dda8 100644 --- a/similarity_search/src/method/permutation_inverted_index.cc +++ b/similarity_search/src/method/permutation_inverted_index.cc @@ -67,7 +67,7 @@ template PermutationInvertedIndex::PermutationInvertedIndex( bool PrintProgress, const Space& space, - const ObjectVector& data) : space_(space), data_(data), PrintProgress_(PrintProgress) { + const ObjectVector& data) : Index(data), space_(space), PrintProgress_(PrintProgress) { } @@ -97,18 +97,18 @@ void PermutationInvertedIndex::CreateIndex(const AnyParams& IndexParams) LOG(LIB_INFO) << "# knnAmp = " << knn_amp_; unique_ptr progress_bar(PrintProgress_ ? - new ProgressDisplay(data_.size(), cerr): + new ProgressDisplay(this->data_.size(), cerr): NULL); - GetPermutationPivot(data_, space_, num_pivot_, &pivot_); + GetPermutationPivot(this->data_, space_, num_pivot_, &pivot_); posting_lists_.resize(num_pivot_); - for (size_t id = 0; id < data_.size(); ++id) { + for (size_t id = 0; id < this->data_.size(); ++id) { Permutation perm; - GetPermutation(pivot_, space_, data_[id], &perm); + GetPermutation(pivot_, space_, this->data_[id], &perm); for (size_t j = 0; j < perm.size(); ++j) { if (perm[j] < num_pivot_index_) { posting_lists_[j].push_back(ObjectInvEntry(id, perm[j])); @@ -172,7 +172,7 @@ void PermutationInvertedIndex::GenSearch(QueryType* query, size_t K) con } } - bool bUseMap = maxScanQty < USE_MAP_THRESHOLD * data_.size(); // TODO: @leo this is rather adhoc + bool bUseMap = maxScanQty < USE_MAP_THRESHOLD * this->data_.size(); // TODO: @leo this is rather adhoc vector perm_dists; @@ -218,9 +218,9 @@ void PermutationInvertedIndex::GenSearch(QueryType* query, size_t K) con } else { int MaxDist = num_pivot_search_ * num_pivot_index_; - perm_dists.reserve(data_.size()); + perm_dists.reserve(this->data_.size()); - for (size_t i = 0; i < data_.size(); ++i) + for (size_t i = 0; i < this->data_.size(); ++i) perm_dists.push_back(make_pair(MaxDist, i)); for (size_t i = 0, inum = 0; i < perm_q.size(); ++i) { @@ -255,7 +255,7 @@ void PermutationInvertedIndex::GenSearch(QueryType* query, size_t K) con for (size_t i = 0; i < scan_qty; ++i) { const size_t idx = quick_select.GetNext().second; quick_select.Next(); - query->CheckAndAddToResult(data_[idx]); + query->CheckAndAddToResult(this->data_[idx]); } } diff --git a/similarity_search/src/method/permutation_prefix_index.cc b/similarity_search/src/method/permutation_prefix_index.cc index 4b295bc..8f7f27e 100644 --- a/similarity_search/src/method/permutation_prefix_index.cc +++ b/similarity_search/src/method/permutation_prefix_index.cc @@ -218,7 +218,7 @@ template PermutationPrefixIndex::PermutationPrefixIndex( bool PrintProgress, const Space& space, - const ObjectVector& data) : space_(space), data_(data), PrintProgress_(PrintProgress) { + const ObjectVector& data) : Index(data), space_(space), PrintProgress_(PrintProgress) { } template @@ -236,16 +236,16 @@ void PermutationPrefixIndex::CreateIndex(const AnyParams& IndexParams) { LOG(LIB_INFO) << "prefix length = " << prefix_length_; LOG(LIB_INFO) << "ChunkBucket = " << chunkBucket_; - GetPermutationPivot(data_, space_, num_pivot_, &pivot_); + GetPermutationPivot(this->data_, space_, num_pivot_, &pivot_); prefixtree_.reset(new PrefixTree); Permutation permutation; unique_ptr progress_bar(PrintProgress_ ? - new ProgressDisplay(data_.size(), cerr) + new ProgressDisplay(this->data_.size(), cerr) :NULL); - for (const auto& it : data_) { + for (const auto& it : this->data_) { permutation.clear(); GetPermutationPPIndex(pivot_, space_, it, &permutation); prefixtree_->Insert(permutation, it, prefix_length_); diff --git a/similarity_search/src/method/pivot_neighb_invindx.cc b/similarity_search/src/method/pivot_neighb_invindx.cc index c2b3eba..84ddd9f 100644 --- a/similarity_search/src/method/pivot_neighb_invindx.cc +++ b/similarity_search/src/method/pivot_neighb_invindx.cc @@ -83,7 +83,7 @@ PivotNeighbInvertedIndex::PivotNeighbInvertedIndex( bool PrintProgress, const Space& space, const ObjectVector& data) - : data_(data), + : Index(data), space_(space), PrintProgress_(PrintProgress), recreate_points_(false), @@ -121,7 +121,7 @@ void PivotNeighbInvertedIndex::CreateIndex(const AnyParams& IndexParams) pmgr.GetParamOptional("pivotFile", pivot_file_, ""); - size_t indexQty = (data_.size() + chunk_index_size_ - 1) / chunk_index_size_; + size_t indexQty = (this->data_.size() + chunk_index_size_ - 1) / chunk_index_size_; pmgr.CheckUnused(); this->ResetQueryTimeParams(); @@ -136,7 +136,7 @@ void PivotNeighbInvertedIndex::CreateIndex(const AnyParams& IndexParams) LOG(LIB_INFO) << "Do we recreate points during indexing when computing distances to pivots? = " << recreate_points_; if (pivot_file_.empty()) - GetPermutationPivot(data_, space_, num_pivot_, &pivot_, &pivot_pos_); + GetPermutationPivot(this->data_, space_, num_pivot_, &pivot_, &pivot_pos_); else { vector vExternIds; space_.ReadDataset(pivot_, vExternIds, pivot_file_, num_pivot_); @@ -165,7 +165,7 @@ void PivotNeighbInvertedIndex::CreateIndex(const AnyParams& IndexParams) if (index_thread_qty_ <= 1) { unique_ptr progress_bar(PrintProgress_ ? - new ProgressDisplay(data_.size(), cerr) + new ProgressDisplay(this->data_.size(), cerr) :NULL); for (size_t chunkId = 0; chunkId < indexQty; ++chunkId) { IndexChunk(chunkId, progress_bar.get(), progressBarMutex); @@ -181,7 +181,7 @@ void PivotNeighbInvertedIndex::CreateIndex(const AnyParams& IndexParams) LOG(LIB_INFO) << "Will create " << index_thread_qty_ << " indexing threads";; unique_ptr progress_bar(PrintProgress_ ? - new ProgressDisplay(data_.size(), cerr) + new ProgressDisplay(this->data_.size(), cerr) :NULL); for (size_t i = 0; i < index_thread_qty_; ++i) { @@ -256,7 +256,7 @@ template void PivotNeighbInvertedIndex::IndexChunk(size_t chunkId, ProgressDisplay* progress_bar, mutex& display_mutex) { size_t minId = chunkId * chunk_index_size_; - size_t maxId = min(data_.size(), minId + chunk_index_size_); + size_t maxId = min(this->data_.size(), minId + chunk_index_size_); auto & chunkPostLists = *posting_lists_[chunkId]; @@ -265,7 +265,7 @@ PivotNeighbInvertedIndex::IndexChunk(size_t chunkId, ProgressDisplay* pr for (size_t id = 0; id < maxId - minId; ++id) { Permutation perm; - const Object* pObj = data_[minId + id]; + const Object* pObj = this->data_[minId + id]; unique_ptr extObj; if (recreate_points_) { @@ -447,9 +447,9 @@ void PivotNeighbInvertedIndex::LoadIndex(const string &location) { " from the header (location " + location + ")"); pivot_.resize(num_pivot_); for (size_t i = 0; i < pivot_pos_.size(); ++i) { - CHECK_MSG(pivot_pos_[i] < data_.size(), + CHECK_MSG(pivot_pos_[i] < this->data_.size(), DATA_MUTATION_ERROR_MSG + " (detected an object index >= #of data points"); - pivot_[i] = data_[pivot_pos_[i]]; + pivot_[i] = this->data_[pivot_pos_[i]]; } ++lineNum; // Read pivot object IDs @@ -542,10 +542,10 @@ void PivotNeighbInvertedIndex::GenSearch(QueryType* query, size_t K) con for (size_t chunkId = 0; chunkId < posting_lists_.size(); ++chunkId) { const auto & chunkPostLists = *posting_lists_[chunkId]; size_t minId = chunkId * chunk_index_size_; - size_t maxId = min(data_.size(), minId + chunk_index_size_); + size_t maxId = min(this->data_.size(), minId + chunk_index_size_); size_t chunkQty = maxId - minId; - const auto data_start = &data_[0] + minId; + const auto data_start = &this->data_[0] + minId; if (use_sort_) { if (!db_scan) { diff --git a/similarity_search/src/method/proj_vptree.cc b/similarity_search/src/method/proj_vptree.cc index 4c77985..5f14ef9 100644 --- a/similarity_search/src/method/proj_vptree.cc +++ b/similarity_search/src/method/proj_vptree.cc @@ -70,8 +70,8 @@ ProjectionVPTree::ProjectionVPTree( bool PrintProgress, Space& space, const ObjectVector& data) : + Index(data), space_(space), - data_(data), PrintProgress_(PrintProgress), K_(0), knn_amp_(0), @@ -126,7 +126,7 @@ void ProjectionVPTree::CreateIndex(const AnyParams& IndexParams) { projObj_.reset(Projection::createProjection( space_, - data_, + this->data_, projType, intermDim, projDim_, @@ -163,10 +163,10 @@ void ProjectionVPTree::CreateIndex(const AnyParams& IndexParams) { - projData_.resize(data_.size()); + projData_.resize(this->data_.size()); - for (size_t id = 0; id < data_.size(); ++id) { - projData_[id] = ProjectOneVect(id, NULL, data_[id]); + for (size_t id = 0; id < this->data_.size(); ++id) { + projData_[id] = ProjectOneVect(id, NULL, this->data_[id]); } ReportIntrinsicDimensionality("Set of projections" , *VPTreeSpace_, projData_); @@ -183,7 +183,7 @@ void ProjectionVPTree::CreateIndex(const AnyParams& IndexParams) { template ProjectionVPTree::~ProjectionVPTree() { - for (size_t i = 0; i < data_.size(); ++i) { + for (size_t i = 0; i < this->data_.size(); ++i) { delete projData_[i]; } } @@ -217,7 +217,7 @@ void ProjectionVPTree::Search(RangeQuery* query, IdType) const { while (!ResQueue->Empty()) { size_t id = reinterpret_cast(ResQueue->TopObject())->id(); - query->CheckAndAddToResult(data_[id]); + query->CheckAndAddToResult(this->data_[id]); ResQueue->Pop(); } } @@ -240,7 +240,7 @@ void ProjectionVPTree::Search(KNNQuery* query, IdType) const { while (!ResQueue->Empty()) { size_t id = reinterpret_cast(ResQueue->TopObject())->id(); - query->CheckAndAddToResult(data_[id]); + query->CheckAndAddToResult(this->data_[id]); ResQueue->Pop(); } } diff --git a/similarity_search/src/method/projection_index_incremental.cc b/similarity_search/src/method/projection_index_incremental.cc index 72f4cda..c77cf8e 100644 --- a/similarity_search/src/method/projection_index_incremental.cc +++ b/similarity_search/src/method/projection_index_incremental.cc @@ -44,7 +44,7 @@ ProjectionIndexIncremental::ProjectionIndexIncremental( bool PrintProgress, const Space& space, const ObjectVector& data) - : space_(space), data_(data), PrintProgress_(PrintProgress), + : Index(data), space_(space), PrintProgress_(PrintProgress), K_(0) { } @@ -83,7 +83,7 @@ void ProjectionIndexIncremental::CreateIndex(const AnyParams& Index proj_obj_.reset(Projection::createProjection( space_, - data_, + this->data_, proj_descr_, intermDim, proj_dim_, @@ -97,15 +97,15 @@ void ProjectionIndexIncremental::CreateIndex(const AnyParams& Index unique_ptr(new AnyParams(projSpaceDesc)); unique_ptr progress_bar(PrintProgress_ ? - new ProgressDisplay(data_.size(), cerr) + new ProgressDisplay(this->data_.size(), cerr) :NULL); #ifdef PROJ_CONTIGUOUS_STORAGE - proj_vects_.resize(data_.size() * proj_dim_); + proj_vects_.resize(this->data_.size() * proj_dim_); vector TmpVect(proj_dim_); - for (size_t i = 0, start = 0; i < data_.size(); ++i, start += proj_dim_) { - proj_obj_->compProj(NULL, data_[i], &TmpVect[0]); + for (size_t i = 0, start = 0; i < this->data_.size(); ++i, start += proj_dim_) { + proj_obj_->compProj(NULL, this->data_[i], &TmpVect[0]); memcpy(&proj_vects_[start], &TmpVect[0], sizeof(proj_vects_[0])*proj_dim_); if (progress_bar) ++(*progress_bar); } @@ -175,10 +175,10 @@ void ProjectionIndexIncremental::GenSearch(QueryType* query, size_t K) c if (!use_priority_queue_) { std::vector proj_dists; - proj_dists.reserve(data_.size()); + proj_dists.reserve(this->data_.size()); #ifdef PROJ_CONTIGUOUS_STORAGE - for (size_t i = 0, start = 0; i < data_.size(); ++i, start += proj_dim_) { + for (size_t i = 0, start = 0; i < this->data_.size(); ++i, start += proj_dim_) { float projDist = use_cosine_ ? CosineSimilarity(&proj_vects_[start], &QueryVect[0], proj_dim_) : @@ -202,13 +202,13 @@ void ProjectionIndexIncremental::GenSearch(QueryType* query, size_t K) c for (size_t i = 0; i < scan_qty; ++i) { const size_t idx = quick_select.GetNext().second; quick_select.Next(); - query->CheckAndAddToResult(data_[idx]); + query->CheckAndAddToResult(this->data_[idx]); } } else { priority_queue filterQueue; #ifdef PROJ_CONTIGUOUS_STORAGE - for (size_t i = 0, start = 0; i < data_.size(); ++i, start += proj_dim_) { + for (size_t i = 0, start = 0; i < this->data_.size(); ++i, start += proj_dim_) { float projDist = L2NormSIMD(&proj_vects_[start], &QueryVect[0], proj_dim_); #else for (size_t i = 0; i < proj_vects_.size(); ++i) { @@ -223,7 +223,7 @@ void ProjectionIndexIncremental::GenSearch(QueryType* query, size_t K) c while (filterQueue.size() > db_scan) filterQueue.pop(); while (!filterQueue.empty()) { const size_t idx = filterQueue.top().second; - query->CheckAndAddToResult(data_[idx]); + query->CheckAndAddToResult(this->data_[idx]); filterQueue.pop(); } } diff --git a/similarity_search/src/method/seqsearch.cc b/similarity_search/src/method/seqsearch.cc index 09445db..c12f70b 100644 --- a/similarity_search/src/method/seqsearch.cc +++ b/similarity_search/src/method/seqsearch.cc @@ -55,7 +55,7 @@ struct SearchThreadSeqSearch { template SeqSearch::SeqSearch(Space& space, const ObjectVector& origData) : - space_(space), origData_(origData), cacheOptimizedBucket_(NULL), pData_(NULL) { + Index(origData), space_(space), cacheOptimizedBucket_(NULL), pData_(NULL) { } template @@ -96,7 +96,7 @@ void SeqSearch::CreateIndex(const AnyParams& IndexParams) { SetQueryTimeParams(getEmptyParams()); if (bCopyMem) { - CreateCacheOptimizedBucket(origData_, cacheOptimizedBucket_, pData_); + CreateCacheOptimizedBucket(this->data_, cacheOptimizedBucket_, pData_); } } diff --git a/similarity_search/src/method/simple_inverted_index.cc b/similarity_search/src/method/simple_inverted_index.cc index ad5f1d3..3f05bbf 100644 --- a/similarity_search/src/method/simple_inverted_index.cc +++ b/similarity_search/src/method/simple_inverted_index.cc @@ -135,7 +135,7 @@ void SimplInvIndex::Search(KNNQuery* query, IdType) const { query->CheckAndAddToResult(-tmpResQueue.top_key(), data_[tmpResQueue.top_data()]); #else // This branch recomputes the distance, but it normally has a negligibly small effect on the run-time - query->CheckAndAddToResult(data_[tmpResQueue.top_data()]); + query->CheckAndAddToResult(this->data_[tmpResQueue.top_data()]); #endif tmpResQueue.pop(); } @@ -159,9 +159,9 @@ void SimplInvIndex::CreateIndex(AnyParamManager& ParamManager) { LOG(LIB_INFO) << "Collecting dictionary stat"; { - ProgressDisplay pbar(data_.size(), cerr); + ProgressDisplay pbar(this->data_.size(), cerr); - for (const Object* o : data_) { + for (const Object* o : this->data_) { tmp_vect.clear(); UnpackSparseElements(o->data(), o->datalength(), tmp_vect); for (const auto& e : tmp_vect) dict_qty[e.id_] ++; @@ -182,12 +182,12 @@ void SimplInvIndex::CreateIndex(AnyParamManager& ParamManager) { } { - ProgressDisplay pbar(data_.size(), cerr); + ProgressDisplay pbar(this->data_.size(), cerr); // Fill posting lists - for (size_t did = 0; did < data_.size(); ++did) { + for (size_t did = 0; did < this->data_.size(); ++did) { tmp_vect.clear(); - UnpackSparseElements(data_[did]->data(), data_[did]->datalength(), tmp_vect); + UnpackSparseElements(this->data_[did]->data(), this->data_[did]->datalength(), tmp_vect); // iterate over all terms in the document (non-zero values in the sparse vector) for (const auto& e : tmp_vect) { const auto wordId = e.id_; diff --git a/similarity_search/src/method/small_world_rand.cc b/similarity_search/src/method/small_world_rand.cc index dfb0ece..950c47c 100644 --- a/similarity_search/src/method/small_world_rand.cc +++ b/similarity_search/src/method/small_world_rand.cc @@ -111,7 +111,7 @@ template SmallWorldRand::SmallWorldRand(bool PrintProgress, const Space& space, const ObjectVector& data) : - space_(space), data_(data), PrintProgress_(PrintProgress), use_proxy_dist_(false) {} + Index(data), space_(space), PrintProgress_(PrintProgress), use_proxy_dist_(false) {} template void SmallWorldRand::UpdateNextNodeId(size_t newNextNodeId) @@ -354,7 +354,7 @@ void SmallWorldRand::CreateIndex(const AnyParams& IndexParams) SetQueryTimeParams(getEmptyParams()); - AddBatch(data_, PrintProgress_); + AddBatch(this->data_, PrintProgress_); changedAfterCreateIndex_ = false; } @@ -766,17 +766,17 @@ void SmallWorldRand::SaveIndex(const string &location) { for(ElementMap::iterator it = ElList_.begin(); it != ElList_.end(); ++it) { MSWNode* pNode = it->second; IdType nodeID = pNode->getId(); - CHECK_MSG(nodeID >= 0 && nodeID < (ssize_t)data_.size(), + CHECK_MSG(nodeID >= 0 && nodeID < (ssize_t)this->data_.size(), "Bug: unexpected node ID " + ConvertToString(nodeID) + " for object ID " + ConvertToString(pNode->getData()->id()) + - "data_.size() = " + ConvertToString(data_.size())); + "data_.size() = " + ConvertToString(this->data_.size())); outFile << nodeID << ":" << pNode->getData()->id() << ":"; for (const MSWNode* pNodeFriend: pNode->getAllFriends()) { IdType nodeFriendID = pNodeFriend->getId(); - CHECK_MSG(nodeFriendID >= 0 && nodeFriendID < (ssize_t)data_.size(), + CHECK_MSG(nodeFriendID >= 0 && nodeFriendID < (ssize_t)this->data_.size(), "Bug: unexpected node ID " + ConvertToString(nodeFriendID) + " for object ID " + ConvertToString(pNodeFriend->getData()->id()) + - "data_.size() = " + ConvertToString(data_.size())); + "data_.size() = " + ConvertToString(this->data_.size())); outFile << ' ' << nodeFriendID; } outFile << endl; lineNum++; @@ -788,7 +788,7 @@ void SmallWorldRand::SaveIndex(const string &location) { template void SmallWorldRand::LoadIndex(const string &location) { - vector ptrMapper(data_.size()); + vector ptrMapper(this->data_.size()); for (unsigned pass = 0; pass < 2; ++ pass) { ifstream inFile(location); @@ -820,17 +820,17 @@ void SmallWorldRand::LoadIndex(const string &location) { string("Bug or inconsitent data, wrong format, c1=") + c1 + ",c2=" + c2 + " line: " + ConvertToString(lineNum) ); - CHECK_MSG(nodeID >= 0 && nodeID < (ssize_t)data_.size(), + CHECK_MSG(nodeID >= 0 && nodeID < (ssize_t)this->data_.size(), DATA_MUTATION_ERROR_MSG + " (unexpected node ID " + ConvertToString(nodeID) + " for object ID " + ConvertToString(objID) + - " data_.size() = " + ConvertToString(data_.size()) + ")"); - CHECK_MSG(data_[nodeID]->id() == objID, - DATA_MUTATION_ERROR_MSG + " (unexpected object ID " + ConvertToString(data_[nodeID]->id()) + + " data_.size() = " + ConvertToString(this->data_.size()) + ")"); + CHECK_MSG(this->data_[nodeID]->id() == objID, + DATA_MUTATION_ERROR_MSG + " (unexpected object ID " + ConvertToString(this->data_[nodeID]->id()) + " for data element with ID " + ConvertToString(nodeID) + " expected object ID: " + ConvertToString(objID) + ")" ); if (pass == 0) { - unique_ptr node(new MSWNode(data_[nodeID], nodeID)); + unique_ptr node(new MSWNode(this->data_[nodeID], nodeID)); ptrMapper[nodeID] = node.get(); IdType dataId = node->getData()->id(); ElList_.insert(make_pair(dataId, node.release())); @@ -840,9 +840,9 @@ void SmallWorldRand::LoadIndex(const string &location) { "Bug, got NULL pointer in the second pass for nodeID " + ConvertToString(nodeID)); IdType nodeFriendID; while (str >> nodeFriendID) { - CHECK_MSG(nodeFriendID >= 0 && nodeFriendID < (ssize_t)data_.size(), + CHECK_MSG(nodeFriendID >= 0 && nodeFriendID < (ssize_t)this->data_.size(), "Bug: unexpected node ID " + ConvertToString(nodeFriendID) + - "data_.size() = " + ConvertToString(data_.size())); + "data_.size() = " + ConvertToString(this->data_.size())); MSWNode *pFriendNode = ptrMapper[nodeFriendID]; CHECK_MSG(pFriendNode != NULL, "Bug, got NULL pointer in the second pass for nodeID " + ConvertToString(nodeFriendID)); diff --git a/similarity_search/src/method/spatial_approx_tree.cc b/similarity_search/src/method/spatial_approx_tree.cc index 6a2535f..93428fd 100644 --- a/similarity_search/src/method/spatial_approx_tree.cc +++ b/similarity_search/src/method/spatial_approx_tree.cc @@ -159,19 +159,19 @@ void SpatialApproxTree::SATNode::Search ( template SpatialApproxTree::SpatialApproxTree( const Space& space, - const ObjectVector& data) : space_(space), data_(data) { + const ObjectVector& data) : Index(data), space_(space) { } template void SpatialApproxTree::CreateIndex(const AnyParams& ) { - size_t index = RandomInt() % data_.size(); - const Object* pivot = data_[index]; + size_t index = RandomInt() % this->data_.size(); + const Object* pivot = this->data_[index]; DistObjectPairVector dp; - for (size_t i = 0; i < data_.size(); ++i) { + for (size_t i = 0; i < this->data_.size(); ++i) { if (i != index) { dp.push_back( - make_pair(space_.IndexTimeDistance(data_[i], pivot), data_[i])); + make_pair(space_.IndexTimeDistance(this->data_[i], pivot), this->data_[i])); } } diff --git a/similarity_search/src/method/vptree.cc b/similarity_search/src/method/vptree.cc index bb6d54b..e1e9844 100644 --- a/similarity_search/src/method/vptree.cc +++ b/similarity_search/src/method/vptree.cc @@ -48,9 +48,9 @@ VPTree::VPTree( bool PrintProgress, Space& space, const ObjectVector& data, - bool use_random_center) : + bool use_random_center) : + Index(data), space_(space), - data_(data), PrintProgress_(PrintProgress), use_random_center_(use_random_center), max_pivot_select_attempts_(MAX_PIVOT_SELECT_ATTEMPTS), @@ -82,13 +82,13 @@ void VPTree::CreateIndex(const AnyParams& IndexParams) { this->ResetQueryTimeParams(); // reset query-time parameters unique_ptr progress_bar(PrintProgress_ ? - new ProgressDisplay(data_.size(), cerr): + new ProgressDisplay(this->data_.size(), cerr): NULL); root_.reset(new VPNode(0, progress_bar.get(), oracle_, - space_, data_, + space_, this->data_, max_pivot_select_attempts_, BucketSize_, ChunkBucket_, use_random_center_ /* use random center */));