diff --git a/.travis.yml b/.travis.yml index ec490b6..d75cf7d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,10 @@ before_install: - | if [ "$TRAVIS_OS_NAME" = "linux" ]; then export CXX=g++-4.8 CC=gcc-4.8; fi if [ "$TRAVIS_OS_NAME" = "osx" ] && [ "${PYTHON:0:1}" = "3" ]; then - brew update; brew install python3; rvm get head || true + brew update + brew install python3 + command curl -sSL https://rvm.io/mpapis.asc | gpg --import -; + rvm get stable fi pip install --user --upgrade pip virtualenv virtualenv -p python$PYTHON venv diff --git a/similarity_search/include/method/falconn.h b/similarity_search/include/method/falconn.h index d600e98..0b90efd 100644 --- a/similarity_search/include/method/falconn.h +++ b/similarity_search/include/method/falconn.h @@ -84,7 +84,6 @@ class FALCONN : public Index { // createDenseDataPoint assumes that p was initialized using dim_ as the number of elements. void createDenseDataPoint(const Object* o, DenseFalconnPoint& p, bool normData) const; - const ObjectVector& data_; Space& space_; bool sparse_; size_t dim_; // only for dense vector spaces diff --git a/similarity_search/include/method/lsh.h b/similarity_search/include/method/lsh.h index 34d74ba..feb7254 100644 --- a/similarity_search/include/method/lsh.h +++ b/similarity_search/include/method/lsh.h @@ -143,7 +143,6 @@ class LSH : public Index { private: typedef lshkit::LshIndex, unsigned> LshIndexType; - const ObjectVector& data_; int p_; lshkit::FloatMatrix* matrix_; LshIndexType* index_; diff --git a/similarity_search/include/method/lsh_multiprobe.h b/similarity_search/include/method/lsh_multiprobe.h index 8531542..edbe0ae 100644 --- a/similarity_search/include/method/lsh_multiprobe.h +++ b/similarity_search/include/method/lsh_multiprobe.h @@ -52,7 +52,6 @@ class MultiProbeLSH : public Index { private: typedef lshkit::MultiProbeLshIndex LshIndexType; - const ObjectVector& data_; int dim_; lshkit::FloatMatrix* matrix_; LshIndexType* index_; diff --git a/similarity_search/include/method/nndes.h b/similarity_search/include/method/nndes.h index 385c09b..8dedbba 100644 --- a/similarity_search/include/method/nndes.h +++ b/similarity_search/include/method/nndes.h @@ -86,7 +86,6 @@ class NNDescentMethod : public Index { const Space& space_; - const ObjectVector& data_; bool PrintProgress_; size_t NN_; // K in the original Wei Dong's code nndes.cpp diff --git a/similarity_search/src/method/falconn.cc b/similarity_search/src/method/falconn.cc index 5639a3d..50882e0 100644 --- a/similarity_search/src/method/falconn.cc +++ b/similarity_search/src/method/falconn.cc @@ -107,7 +107,7 @@ void FALCONN::copyData(bool normData, bool centerData, size_t max_sparse SpaceSparseVectorInter* pSparseSpace = dynamic_cast*>(&space_); VectorSpace* pDenseSpace = dynamic_cast*>(&space_); - if (data_.empty()) return; + if (this->data_.empty()) return; if (pSparseSpace == nullptr && pDenseSpace == nullptr) { throw runtime_error("Only dense vector spaces and FAST sparse vector spaces are supported!"); } @@ -116,7 +116,7 @@ void FALCONN::copyData(bool normData, bool centerData, size_t max_sparse sparse_ = true; SparseFalconnPoint p; dim_ = 0; - for (const Object* o: data_) { + for (const Object* o: this->data_) { createSparseDataPoint(o, p, normData); falconn_data_sparse_.push_back(p); if (p.size()) @@ -137,9 +137,9 @@ void FALCONN::copyData(bool normData, bool centerData, size_t max_sparse } if (pDenseSpace != nullptr) { LOG(LIB_INFO) << "Copying a dense vector data set."; - dim_ = data_[0]->datalength() / sizeof(dist_t); + dim_ = this->data_[0]->datalength() / sizeof(dist_t); DenseFalconnPoint p(dim_); - for (const Object* o: data_) { + for (const Object* o: this->data_) { createDenseDataPoint(o, p, normData); falconn_data_dense_.emplace_back(p); } @@ -171,7 +171,7 @@ void FALCONN::copyData(bool normData, bool centerData, size_t max_sparse template FALCONN::FALCONN(Space& space, - const ObjectVector& data) : data_(data), space_(space), sparse_(false), + const ObjectVector& data) : Index(data), space_(space), sparse_(false), dim_(0), num_probes_(10) { } @@ -242,7 +242,7 @@ void FALCONN::CreateIndex(const AnyParams& IndexParams) { * The current formula mimics the Glove setup in ann_benchmarks, * where a roughly 2^20 entry data sets uses 16-bit tables. */ - size_t num_hash_bits = max(2, static_cast(floor(log2(data_.size()>>4)))); + size_t num_hash_bits = max(2, static_cast(floor(log2(this->data_.size()>>4)))); pmgr.GetParamOptional(PARAM_NUM_HASH_BITS, num_hash_bits, num_hash_bits); /* @@ -312,17 +312,17 @@ void FALCONN::Search(KNNQuery* query, IdType) const { } // Recomputing distances for k nearest neighbors should have a very small impact on overall performance for (IdType ii : ids) { - query->CheckAndAddToResult(data_[ii]); + query->CheckAndAddToResult(this->data_[ii]); } } else { if (sparse_) { SparseFalconnPoint sparseQ; createSparseDataPoint(query->QueryObject(), sparseQ, norm_data_); - falconn_table_sparse_->find_k_nearest_neighbors(sparseQ, center_point_.get(), query, &data_, query->GetK(), &ids); + falconn_table_sparse_->find_k_nearest_neighbors(sparseQ, center_point_.get(), query, &this->data_, query->GetK(), &ids); } else { DenseFalconnPoint denseQ(dim_); createDenseDataPoint(query->QueryObject(), denseQ, norm_data_); - falconn_table_dense_->find_k_nearest_neighbors(denseQ, center_point_.get(), query, &data_, query->GetK(), &ids); + falconn_table_dense_->find_k_nearest_neighbors(denseQ, center_point_.get(), query, &this->data_, query->GetK(), &ids); } } } diff --git a/similarity_search/src/method/lsh.cc b/similarity_search/src/method/lsh.cc index 8b0e2d4..0f657f5 100644 --- a/similarity_search/src/method/lsh.cc +++ b/similarity_search/src/method/lsh.cc @@ -27,7 +27,7 @@ namespace similarity { template LSH::LSH(const Space& space, const ObjectVector& data, - int P) : data_(data), p_(P) { + int P) : Index(data), p_(P) { } @@ -43,15 +43,15 @@ void LSH::CreateIndex(const AnyParams& IndexParam pmgr.GetParamOptional("W", LSH_W, 20); pmgr.GetParamOptional("M", LSH_M, 20); pmgr.GetParamOptional("L", LSH_L, 50); - pmgr.GetParamOptional("H", LSH_H, data_.size() + 1); + pmgr.GetParamOptional("H", LSH_H, this->data_.size() + 1); int is_float = std::is_same::value; CHECK_MSG(is_float, "LSH works only for single-precision numbers"); CHECK_MSG(sizeof(dist_t) == sizeof(float), "LSH works only for single-precision numbers"); - CHECK_MSG(!data_.empty(), "The data set shouldn't be empty"); + CHECK_MSG(!this->data_.empty(), "The data set shouldn't be empty"); CHECK_MSG(p_ == 1 || p_ == 2, "The value of the space selector should be either 1 or 2!"); - const size_t datalength = data_[0]->datalength(); + const size_t datalength = this->data_[0]->datalength(); LOG(LIB_INFO) << "W (window size (used only for LSHCauchy and LSHGaussian)) : " << LSH_W; LOG(LIB_INFO) << "M (# of hash functions) : " << LSH_M; @@ -59,11 +59,11 @@ void LSH::CreateIndex(const AnyParams& IndexParam LOG(LIB_INFO) << "H (# hash table size) : " << LSH_H; const int dim = static_cast(datalength / sizeof(float)); - matrix_ = new lshkit::FloatMatrix(dim, static_cast(data_.size())); + matrix_ = new lshkit::FloatMatrix(dim, static_cast(this->data_.size())); - for (size_t i = 0; i < data_.size(); ++i) { - CHECK(datalength == data_[i]->datalength()); - const float* x = reinterpret_cast(data_[i]->data()); + for (size_t i = 0; i < this->data_.size(); ++i) { + CHECK(datalength == this->data_[i]->datalength()); + const float* x = reinterpret_cast(this->data_[i]->data()); for (int j = 0; j < dim; ++j) { (*matrix_)[i][j] = x[j]; } @@ -115,7 +115,7 @@ void LSH::Search(KNNQuery* query, IdType) const lshkit::Topk& knn = query_scanner.topk(); for (size_t i = 0; i < knn.size(); ++i) { if (knn[i].key != std::numeric_limits::max()) { - query->CheckAndAddToResult(knn[i].dist, data_[knn[i].key]); + query->CheckAndAddToResult(knn[i].dist, this->data_[knn[i].key]); } } } diff --git a/similarity_search/src/method/lsh_multiprobe.cc b/similarity_search/src/method/lsh_multiprobe.cc index 711b54d..8ef468b 100644 --- a/similarity_search/src/method/lsh_multiprobe.cc +++ b/similarity_search/src/method/lsh_multiprobe.cc @@ -26,7 +26,7 @@ namespace similarity { template MultiProbeLSH::MultiProbeLSH(const Space& space, - const ObjectVector& data) : data_(data) { + const ObjectVector& data) : Index(data) { } template @@ -43,7 +43,7 @@ void MultiProbeLSH::CreateIndex(const AnyParams& IndexParams) { pmgr.GetParamOptional("M", LSH_M, 20); pmgr.GetParamOptional("L", LSH_L, 50); - pmgr.GetParamOptional("H", LSH_H, data_.size() + 1); + pmgr.GetParamOptional("H", LSH_H, this->data_.size() + 1); pmgr.GetParamOptional("W", LSH_W, 20); pmgr.GetParamOptional("T", LSH_T, 10); pmgr.GetParamOptional("tuneK", LSH_TuneK, 1); @@ -52,7 +52,7 @@ void MultiProbeLSH::CreateIndex(const AnyParams& IndexParams) { // For FitData(): // number of points to use - unsigned N1 = data_.size(); + unsigned N1 = this->data_.size(); // number of pairs to sample unsigned P = 10000; // number of queries to sample @@ -82,7 +82,7 @@ void MultiProbeLSH::CreateIndex(const AnyParams& IndexParams) { unsigned F = 10; // For MPLSHTune(): // dataset size - unsigned N2 = data_.size(); + unsigned N2 = this->data_.size(); // desired recall CreateIndexInternal( @@ -113,13 +113,13 @@ void MultiProbeLSH::CreateIndexInternal( int is_float = std::is_same::value; CHECK(is_float); CHECK(sizeof(dist_t) == sizeof(float)); - CHECK(!data_.empty()); - const size_t datalength = data_[0]->datalength(); + CHECK(!this->data_.empty()); + const size_t datalength = this->data_[0]->datalength(); dim_ = static_cast(datalength / sizeof(float)); - matrix_ = new lshkit::FloatMatrix(dim_, static_cast(data_.size())); - for (size_t i = 0; i < data_.size(); ++i) { - CHECK(datalength == data_[i]->datalength()); - const float* x = reinterpret_cast(data_[i]->data()); + matrix_ = new lshkit::FloatMatrix(dim_, static_cast(this->data_.size())); + for (size_t i = 0; i < this->data_.size(); ++i) { + CHECK(datalength == this->data_[i]->datalength()); + const float* x = reinterpret_cast(this->data_[i]->data()); for (int j = 0; j < dim_; ++j) { (*matrix_)[i][j] = x[j]; } @@ -188,7 +188,7 @@ void MultiProbeLSH::Search(KNNQuery* query, IdType) const { const lshkit::Topk& knn = query_scanner.topk(); for (size_t i = 0; i < knn.size(); ++i) { if (knn[i].key != std::numeric_limits::max()) { - query->CheckAndAddToResult(sqrt(knn[i].dist), data_[knn[i].key]); + query->CheckAndAddToResult(sqrt(knn[i].dist), this->data_[knn[i].key]); } } } diff --git a/similarity_search/src/method/nndes.cc b/similarity_search/src/method/nndes.cc index a8fc7f4..839348b 100644 --- a/similarity_search/src/method/nndes.cc +++ b/similarity_search/src/method/nndes.cc @@ -67,7 +67,8 @@ NNDescentMethod::NNDescentMethod( bool PrintProgress, const Space& space, const ObjectVector& data) : - space_(space), data_(data), PrintProgress_(PrintProgress), + Index(data), + space_(space), PrintProgress_(PrintProgress), controlQty_(0), // default value from Wei Dong's code nndesOracle_(space, data) {} @@ -92,17 +93,17 @@ void NNDescentMethod::CreateIndex(const AnyParams& IndexParams) { LOG(LIB_INFO) << "Starting NN-Descent..."; - nndesObj_.reset(new NNDescent(data_.size(), // N + nndesObj_.reset(new NNDescent(this->data_.size(), // N NN_, //K rho_, //S, nndesOracle_, GRAPH_BOTH)); - float total = float(data_.size()) * (data_.size() - 1) / 2; + float total = float(this->data_.size()) * (this->data_.size() - 1) / 2; cout.precision(5); cout.setf(ios::fixed); for (int it = 0; it < iterationQty_; ++it) { int t = nndesObj_->iterate(PrintProgress_); - float rate = float(t) / (NN_ * data_.size()); + float rate = float(t) / (NN_ * this->data_.size()); // TODO @leo computation of recall needs to be re-written, can't use original Wei Dong's code /* @@ -140,7 +141,7 @@ void NNDescentMethod::SearchSmallWorld(KNNQuery* query) const { const vector &nn = nndesObj_->getNN(); #if USE_BITSET_FOR_SEARCHING - vector visitedBitset(data_.size()); + vector visitedBitset(this->data_.size()); #else unordered_set visitedNodes; #endif @@ -149,12 +150,12 @@ void NNDescentMethod::SearchSmallWorld(KNNQuery* query) const { /** * Search of most k-closest elements to the query. */ - IdType randPoint = RandomInt() % data_.size(); + IdType randPoint = RandomInt() % this->data_.size(); priority_queue closestDistQueue; //The set of all elements which distance was calculated priority_queue candidateSet; //the set of elements which we can use to evaluate - const Object* currObj = data_[randPoint]; + const Object* currObj = this->data_[randPoint]; dist_t d = query->DistanceObjLeft(currObj); query->CheckAndAddToResult(d, currObj); @@ -195,7 +196,7 @@ void NNDescentMethod::SearchSmallWorld(KNNQuery* query) const { visitedNodes.insert(currNew); #endif - currObj = data_[currNew]; + currObj = this->data_[currNew]; d = query->DistanceObjLeft(currObj); query->CheckAndAddToResult(d, currObj); EvaluatedNode evE1(-d, currNew); @@ -217,10 +218,10 @@ void NNDescentMethod::SearchGreedy(KNNQuery* query) const { const vector &nn = nndesObj_->getNN(); for (size_t i=0; i < initSearchAttempts_; i++) { - IdType curr = RandomInt() % data_.size(); + IdType curr = RandomInt() % this->data_.size(); - dist_t currDist = query->DistanceObjLeft(data_[curr]); - query->CheckAndAddToResult(currDist, data_[curr]); + dist_t currDist = query->DistanceObjLeft(this->data_[curr]); + query->CheckAndAddToResult(currDist, this->data_[curr]); IdType currOld; @@ -231,8 +232,8 @@ void NNDescentMethod::SearchGreedy(KNNQuery* query) const { for (const KNNEntry&e: nn[currOld]) { IdType currNew = e.key; if (currNew != KNNEntry::BAD) { - dist_t currDistNew = query->DistanceObjLeft(data_[currNew]); - query->CheckAndAddToResult(currDistNew, data_[currNew]); + dist_t currDistNew = query->DistanceObjLeft(this->data_[currNew]); + query->CheckAndAddToResult(currDistNew, this->data_[currNew]); if (currDistNew < currDist) { curr = currNew; currDist = currDistNew;