Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:searchivarius/nmslib into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
searchivairus committed Dec 8, 2017
2 parents b854b20 + 34981bb commit df4fdbd
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 47 deletions.
5 changes: 4 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ before_install:
- |
if [ "$TRAVIS_OS_NAME" = "linux" ]; then export CXX=g++-4.8 CC=gcc-4.8; fi
if [ "$TRAVIS_OS_NAME" = "osx" ] && [ "${PYTHON:0:1}" = "3" ]; then
brew update; brew install python3; rvm get head || true
brew update
brew install python3
command curl -sSL https://rvm.io/mpapis.asc | gpg --import -;
rvm get stable
fi
pip install --user --upgrade pip virtualenv
virtualenv -p python$PYTHON venv
Expand Down
1 change: 0 additions & 1 deletion similarity_search/include/method/falconn.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ class FALCONN : public Index<dist_t> {
// createDenseDataPoint assumes that p was initialized using dim_ as the number of elements.
void createDenseDataPoint(const Object* o, DenseFalconnPoint& p, bool normData) const;

const ObjectVector& data_;
Space<dist_t>& space_;
bool sparse_;
size_t dim_; // only for dense vector spaces
Expand Down
1 change: 0 additions & 1 deletion similarity_search/include/method/lsh.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ class LSH : public Index<dist_t> {
private:
typedef lshkit::LshIndex<TailRepeatHash<lsh_t>, unsigned> LshIndexType;

const ObjectVector& data_;
int p_;
lshkit::FloatMatrix* matrix_;
LshIndexType* index_;
Expand Down
1 change: 0 additions & 1 deletion similarity_search/include/method/lsh_multiprobe.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ class MultiProbeLSH : public Index<dist_t> {
private:
typedef lshkit::MultiProbeLshIndex<unsigned> LshIndexType;

const ObjectVector& data_;
int dim_;
lshkit::FloatMatrix* matrix_;
LshIndexType* index_;
Expand Down
1 change: 0 additions & 1 deletion similarity_search/include/method/nndes.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ class NNDescentMethod : public Index<dist_t> {


const Space<dist_t>& space_;
const ObjectVector& data_;
bool PrintProgress_;

size_t NN_; // K in the original Wei Dong's code nndes.cpp
Expand Down
18 changes: 9 additions & 9 deletions similarity_search/src/method/falconn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void FALCONN<dist_t>::copyData(bool normData, bool centerData, size_t max_sparse
SpaceSparseVectorInter<dist_t>* pSparseSpace = dynamic_cast<SpaceSparseVectorInter<dist_t>*>(&space_);
VectorSpace<dist_t>* pDenseSpace = dynamic_cast<VectorSpace<dist_t>*>(&space_);

if (data_.empty()) return;
if (this->data_.empty()) return;
if (pSparseSpace == nullptr && pDenseSpace == nullptr) {
throw runtime_error("Only dense vector spaces and FAST sparse vector spaces are supported!");
}
Expand All @@ -116,7 +116,7 @@ void FALCONN<dist_t>::copyData(bool normData, bool centerData, size_t max_sparse
sparse_ = true;
SparseFalconnPoint p;
dim_ = 0;
for (const Object* o: data_) {
for (const Object* o: this->data_) {
createSparseDataPoint(o, p, normData);
falconn_data_sparse_.push_back(p);
if (p.size())
Expand All @@ -137,9 +137,9 @@ void FALCONN<dist_t>::copyData(bool normData, bool centerData, size_t max_sparse
}
if (pDenseSpace != nullptr) {
LOG(LIB_INFO) << "Copying a dense vector data set.";
dim_ = data_[0]->datalength() / sizeof(dist_t);
dim_ = this->data_[0]->datalength() / sizeof(dist_t);
DenseFalconnPoint p(dim_);
for (const Object* o: data_) {
for (const Object* o: this->data_) {
createDenseDataPoint(o, p, normData);
falconn_data_dense_.emplace_back(p);
}
Expand Down Expand Up @@ -171,7 +171,7 @@ void FALCONN<dist_t>::copyData(bool normData, bool centerData, size_t max_sparse

template <typename dist_t>
FALCONN<dist_t>::FALCONN(Space<dist_t>& space,
const ObjectVector& data) : data_(data), space_(space), sparse_(false),
const ObjectVector& data) : Index<dist_t>(data), space_(space), sparse_(false),
dim_(0), num_probes_(10) {
}

Expand Down Expand Up @@ -242,7 +242,7 @@ void FALCONN<dist_t>::CreateIndex(const AnyParams& IndexParams) {
* The current formula mimics the Glove setup in ann_benchmarks,
* where a roughly 2^20 entry data sets uses 16-bit tables.
*/
size_t num_hash_bits = max<size_t>(2, static_cast<size_t>(floor(log2(data_.size()>>4))));
size_t num_hash_bits = max<size_t>(2, static_cast<size_t>(floor(log2(this->data_.size()>>4))));
pmgr.GetParamOptional(PARAM_NUM_HASH_BITS, num_hash_bits, num_hash_bits);

/*
Expand Down Expand Up @@ -312,17 +312,17 @@ void FALCONN<dist_t>::Search(KNNQuery<dist_t>* query, IdType) const {
}
// Recomputing distances for k nearest neighbors should have a very small impact on overall performance
for (IdType ii : ids) {
query->CheckAndAddToResult(data_[ii]);
query->CheckAndAddToResult(this->data_[ii]);
}
} else {
if (sparse_) {
SparseFalconnPoint sparseQ;
createSparseDataPoint(query->QueryObject(), sparseQ, norm_data_);
falconn_table_sparse_->find_k_nearest_neighbors(sparseQ, center_point_.get(), query, &data_, query->GetK(), &ids);
falconn_table_sparse_->find_k_nearest_neighbors(sparseQ, center_point_.get(), query, &this->data_, query->GetK(), &ids);
} else {
DenseFalconnPoint denseQ(dim_);
createDenseDataPoint(query->QueryObject(), denseQ, norm_data_);
falconn_table_dense_->find_k_nearest_neighbors(denseQ, center_point_.get(), query, &data_, query->GetK(), &ids);
falconn_table_dense_->find_k_nearest_neighbors(denseQ, center_point_.get(), query, &this->data_, query->GetK(), &ids);
}
}
}
Expand Down
18 changes: 9 additions & 9 deletions similarity_search/src/method/lsh.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace similarity {
template <typename dist_t, typename lsh_t, typename paramcreator_t>
LSH<dist_t, lsh_t, paramcreator_t>::LSH(const Space<dist_t>& space,
const ObjectVector& data,
int P) : data_(data), p_(P) {
int P) : Index<dist_t>(data), p_(P) {

}

Expand All @@ -43,27 +43,27 @@ void LSH<dist_t, lsh_t, paramcreator_t>::CreateIndex(const AnyParams& IndexParam
pmgr.GetParamOptional("W", LSH_W, 20);
pmgr.GetParamOptional("M", LSH_M, 20);
pmgr.GetParamOptional("L", LSH_L, 50);
pmgr.GetParamOptional("H", LSH_H, data_.size() + 1);
pmgr.GetParamOptional("H", LSH_H, this->data_.size() + 1);

int is_float = std::is_same<float,dist_t>::value;
CHECK_MSG(is_float, "LSH works only for single-precision numbers");
CHECK_MSG(sizeof(dist_t) == sizeof(float), "LSH works only for single-precision numbers");
CHECK_MSG(!data_.empty(), "The data set shouldn't be empty");
CHECK_MSG(!this->data_.empty(), "The data set shouldn't be empty");
CHECK_MSG(p_ == 1 || p_ == 2, "The value of the space selector should be either 1 or 2!");

const size_t datalength = data_[0]->datalength();
const size_t datalength = this->data_[0]->datalength();

LOG(LIB_INFO) << "W (window size (used only for LSHCauchy and LSHGaussian)) : " << LSH_W;
LOG(LIB_INFO) << "M (# of hash functions) : " << LSH_M;
LOG(LIB_INFO) << "L (# of hash tables) : " << LSH_L;
LOG(LIB_INFO) << "H (# hash table size) : " << LSH_H;

const int dim = static_cast<int>(datalength / sizeof(float));
matrix_ = new lshkit::FloatMatrix(dim, static_cast<int>(data_.size()));
matrix_ = new lshkit::FloatMatrix(dim, static_cast<int>(this->data_.size()));

for (size_t i = 0; i < data_.size(); ++i) {
CHECK(datalength == data_[i]->datalength());
const float* x = reinterpret_cast<const float*>(data_[i]->data());
for (size_t i = 0; i < this->data_.size(); ++i) {
CHECK(datalength == this->data_[i]->datalength());
const float* x = reinterpret_cast<const float*>(this->data_[i]->data());
for (int j = 0; j < dim; ++j) {
(*matrix_)[i][j] = x[j];
}
Expand Down Expand Up @@ -115,7 +115,7 @@ void LSH<dist_t, lsh_t, paramcreator_t>::Search(KNNQuery<dist_t>* query, IdType)
const lshkit::Topk<uint32_t>& knn = query_scanner.topk();
for (size_t i = 0; i < knn.size(); ++i) {
if (knn[i].key != std::numeric_limits<uint32_t>::max()) {
query->CheckAndAddToResult(knn[i].dist, data_[knn[i].key]);
query->CheckAndAddToResult(knn[i].dist, this->data_[knn[i].key]);
}
}
}
Expand Down
22 changes: 11 additions & 11 deletions similarity_search/src/method/lsh_multiprobe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace similarity {

template <typename dist_t>
MultiProbeLSH<dist_t>::MultiProbeLSH(const Space<dist_t>& space,
const ObjectVector& data) : data_(data) {
const ObjectVector& data) : Index<dist_t>(data) {
}

template <typename dist_t>
Expand All @@ -43,7 +43,7 @@ void MultiProbeLSH<dist_t>::CreateIndex(const AnyParams& IndexParams) {

pmgr.GetParamOptional("M", LSH_M, 20);
pmgr.GetParamOptional("L", LSH_L, 50);
pmgr.GetParamOptional("H", LSH_H, data_.size() + 1);
pmgr.GetParamOptional("H", LSH_H, this->data_.size() + 1);
pmgr.GetParamOptional("W", LSH_W, 20);
pmgr.GetParamOptional("T", LSH_T, 10);
pmgr.GetParamOptional("tuneK", LSH_TuneK, 1);
Expand All @@ -52,7 +52,7 @@ void MultiProbeLSH<dist_t>::CreateIndex(const AnyParams& IndexParams) {

// For FitData():
// number of points to use
unsigned N1 = data_.size();
unsigned N1 = this->data_.size();
// number of pairs to sample
unsigned P = 10000;
// number of queries to sample
Expand Down Expand Up @@ -82,7 +82,7 @@ void MultiProbeLSH<dist_t>::CreateIndex(const AnyParams& IndexParams) {
unsigned F = 10;
// For MPLSHTune():
// dataset size
unsigned N2 = data_.size();
unsigned N2 = this->data_.size();
// desired recall

CreateIndexInternal(
Expand Down Expand Up @@ -113,13 +113,13 @@ void MultiProbeLSH<dist_t>::CreateIndexInternal(
int is_float = std::is_same<float,dist_t>::value;
CHECK(is_float);
CHECK(sizeof(dist_t) == sizeof(float));
CHECK(!data_.empty());
const size_t datalength = data_[0]->datalength();
CHECK(!this->data_.empty());
const size_t datalength = this->data_[0]->datalength();
dim_ = static_cast<int>(datalength / sizeof(float));
matrix_ = new lshkit::FloatMatrix(dim_, static_cast<int>(data_.size()));
for (size_t i = 0; i < data_.size(); ++i) {
CHECK(datalength == data_[i]->datalength());
const float* x = reinterpret_cast<const float*>(data_[i]->data());
matrix_ = new lshkit::FloatMatrix(dim_, static_cast<int>(this->data_.size()));
for (size_t i = 0; i < this->data_.size(); ++i) {
CHECK(datalength == this->data_[i]->datalength());
const float* x = reinterpret_cast<const float*>(this->data_[i]->data());
for (int j = 0; j < dim_; ++j) {
(*matrix_)[i][j] = x[j];
}
Expand Down Expand Up @@ -188,7 +188,7 @@ void MultiProbeLSH<dist_t>::Search(KNNQuery<dist_t>* query, IdType) const {
const lshkit::Topk<uint32_t>& knn = query_scanner.topk();
for (size_t i = 0; i < knn.size(); ++i) {
if (knn[i].key != std::numeric_limits<uint32_t>::max()) {
query->CheckAndAddToResult(sqrt(knn[i].dist), data_[knn[i].key]);
query->CheckAndAddToResult(sqrt(knn[i].dist), this->data_[knn[i].key]);
}
}
}
Expand Down
27 changes: 14 additions & 13 deletions similarity_search/src/method/nndes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ NNDescentMethod<dist_t>::NNDescentMethod(
bool PrintProgress,
const Space<dist_t>& space,
const ObjectVector& data) :
space_(space), data_(data), PrintProgress_(PrintProgress),
Index<dist_t>(data),
space_(space), PrintProgress_(PrintProgress),
controlQty_(0), // default value from Wei Dong's code
nndesOracle_(space, data)
{}
Expand All @@ -92,17 +93,17 @@ void NNDescentMethod<dist_t>::CreateIndex(const AnyParams& IndexParams) {

LOG(LIB_INFO) << "Starting NN-Descent...";

nndesObj_.reset(new NNDescent<SpaceOracle>(data_.size(), // N
nndesObj_.reset(new NNDescent<SpaceOracle>(this->data_.size(), // N
NN_, //K
rho_, //S,
nndesOracle_, GRAPH_BOTH));

float total = float(data_.size()) * (data_.size() - 1) / 2;
float total = float(this->data_.size()) * (this->data_.size() - 1) / 2;
cout.precision(5);
cout.setf(ios::fixed);
for (int it = 0; it < iterationQty_; ++it) {
int t = nndesObj_->iterate(PrintProgress_);
float rate = float(t) / (NN_ * data_.size());
float rate = float(t) / (NN_ * this->data_.size());

// TODO @leo computation of recall needs to be re-written, can't use original Wei Dong's code
/*
Expand Down Expand Up @@ -140,7 +141,7 @@ void NNDescentMethod<dist_t>::SearchSmallWorld(KNNQuery<dist_t>* query) const {
const vector<KNN> &nn = nndesObj_->getNN();

#if USE_BITSET_FOR_SEARCHING
vector<bool> visitedBitset(data_.size());
vector<bool> visitedBitset(this->data_.size());
#else
unordered_set <IdType> visitedNodes;
#endif
Expand All @@ -149,12 +150,12 @@ void NNDescentMethod<dist_t>::SearchSmallWorld(KNNQuery<dist_t>* query) const {
/**
* Search of most k-closest elements to the query.
*/
IdType randPoint = RandomInt() % data_.size();
IdType randPoint = RandomInt() % this->data_.size();

priority_queue <dist_t> closestDistQueue; //The set of all elements which distance was calculated
priority_queue <EvaluatedNode> candidateSet; //the set of elements which we can use to evaluate

const Object* currObj = data_[randPoint];
const Object* currObj = this->data_[randPoint];
dist_t d = query->DistanceObjLeft(currObj);
query->CheckAndAddToResult(d, currObj);

Expand Down Expand Up @@ -195,7 +196,7 @@ void NNDescentMethod<dist_t>::SearchSmallWorld(KNNQuery<dist_t>* query) const {
visitedNodes.insert(currNew);
#endif

currObj = data_[currNew];
currObj = this->data_[currNew];
d = query->DistanceObjLeft(currObj);
query->CheckAndAddToResult(d, currObj);
EvaluatedNode evE1(-d, currNew);
Expand All @@ -217,10 +218,10 @@ void NNDescentMethod<dist_t>::SearchGreedy(KNNQuery<dist_t>* query) const {
const vector<KNN> &nn = nndesObj_->getNN();

for (size_t i=0; i < initSearchAttempts_; i++) {
IdType curr = RandomInt() % data_.size();
IdType curr = RandomInt() % this->data_.size();

dist_t currDist = query->DistanceObjLeft(data_[curr]);
query->CheckAndAddToResult(currDist, data_[curr]);
dist_t currDist = query->DistanceObjLeft(this->data_[curr]);
query->CheckAndAddToResult(currDist, this->data_[curr]);


IdType currOld;
Expand All @@ -231,8 +232,8 @@ void NNDescentMethod<dist_t>::SearchGreedy(KNNQuery<dist_t>* query) const {
for (const KNNEntry&e: nn[currOld]) {
IdType currNew = e.key;
if (currNew != KNNEntry::BAD) {
dist_t currDistNew = query->DistanceObjLeft(data_[currNew]);
query->CheckAndAddToResult(currDistNew, data_[currNew]);
dist_t currDistNew = query->DistanceObjLeft(this->data_[currNew]);
query->CheckAndAddToResult(currDistNew, this->data_[currNew]);
if (currDistNew < currDist) {
curr = currNew;
currDist = currDistNew;
Expand Down

0 comments on commit df4fdbd

Please sign in to comment.