From 805a53a69ed04ee6aefa384ea53281a2a09f6c44 Mon Sep 17 00:00:00 2001 From: searchivairus Date: Mon, 29 Jan 2018 02:57:11 -0500 Subject: [PATCH] Further improving integration tests (within #154) --- .../factory/method/blkmax_inverted_index.h | 2 +- .../factory/method/simple_inverted_index.h | 2 +- .../factory/method/wand_inverted_index.h | 2 +- .../include/method/blkmax_inverted_index.h | 4 +- .../include/method/simple_inverted_index.h | 7 +- .../include/method/wand_inverted_index.h | 5 +- .../src/method/simple_inverted_index.cc | 13 +- similarity_search/test/test_integr.cc | 364 ++++++++++-------- similarity_search/test/test_integr_util.h | 183 +++++---- 9 files changed, 330 insertions(+), 252 deletions(-) diff --git a/similarity_search/include/factory/method/blkmax_inverted_index.h b/similarity_search/include/factory/method/blkmax_inverted_index.h index 5309812..7407dc9 100644 --- a/similarity_search/include/factory/method/blkmax_inverted_index.h +++ b/similarity_search/include/factory/method/blkmax_inverted_index.h @@ -30,7 +30,7 @@ Index* CreateBlockMaxInvIndex(bool PrintProgress, const string& SpaceType, Space& space, const ObjectVector& DataObjects) { - return new BlockMaxInvIndex(space, DataObjects); + return new BlockMaxInvIndex(PrintProgress, space, DataObjects); } /* diff --git a/similarity_search/include/factory/method/simple_inverted_index.h b/similarity_search/include/factory/method/simple_inverted_index.h index af8e872..ce2634a 100644 --- a/similarity_search/include/factory/method/simple_inverted_index.h +++ b/similarity_search/include/factory/method/simple_inverted_index.h @@ -30,7 +30,7 @@ Index* CreateSimplInvIndex(bool PrintProgress, const string& SpaceType, Space& space, const ObjectVector& DataObjects) { - return new SimplInvIndex(space, DataObjects); + return new SimplInvIndex(PrintProgress, space, DataObjects); } /* diff --git a/similarity_search/include/factory/method/wand_inverted_index.h b/similarity_search/include/factory/method/wand_inverted_index.h index e827263..5cbaea1 100644 --- a/similarity_search/include/factory/method/wand_inverted_index.h +++ b/similarity_search/include/factory/method/wand_inverted_index.h @@ -30,7 +30,7 @@ Index* CreateWANDInvIndex(bool PrintProgress, const string& SpaceType, Space& space, const ObjectVector& DataObjects) { - return new WandInvIndex(space, DataObjects); + return new WandInvIndex(PrintProgress, space, DataObjects); } /* diff --git a/similarity_search/include/method/blkmax_inverted_index.h b/similarity_search/include/method/blkmax_inverted_index.h index 3196d00..7c9b0fc 100644 --- a/similarity_search/include/method/blkmax_inverted_index.h +++ b/similarity_search/include/method/blkmax_inverted_index.h @@ -31,8 +31,8 @@ using std::string; template class BlockMaxInvIndex : public WandInvIndex { public: - BlockMaxInvIndex(Space& space, - const ObjectVector& data) : WandInvIndex(space, data) { + BlockMaxInvIndex(bool printProgress, Space& space, + const ObjectVector& data) : WandInvIndex(printProgress, space, data) { } void CreateIndex(const AnyParams& IndexParams) override; diff --git a/similarity_search/include/method/simple_inverted_index.h b/similarity_search/include/method/simple_inverted_index.h index 7dbf61b..ffbf839 100644 --- a/similarity_search/include/method/simple_inverted_index.h +++ b/similarity_search/include/method/simple_inverted_index.h @@ -38,8 +38,10 @@ class SimplInvIndex : public Index { * which are guaranteed to be be valid during testing. * So, we can memorize them safely. */ - SimplInvIndex(Space& space, - const ObjectVector& data) : Index(data), + SimplInvIndex(bool printProgress, + Space& space, + const ObjectVector& data) : printProgress_(printProgress), + Index(data), pSpace_(dynamic_cast(&space)) { if (pSpace_ == nullptr) { PREPARE_RUNTIME_ERR(err) << @@ -119,6 +121,7 @@ class SimplInvIndex : public Index { : post_(&pl), post_pos_(0), qval_(qval), qval_x_docval_(qval_x_docval) {} }; + bool printProgress_; SpaceSparseNegativeScalarProductFast* pSpace_; std::unordered_map> index_; // disable copy and assign diff --git a/similarity_search/include/method/wand_inverted_index.h b/similarity_search/include/method/wand_inverted_index.h index d363108..c994548 100644 --- a/similarity_search/include/method/wand_inverted_index.h +++ b/similarity_search/include/method/wand_inverted_index.h @@ -32,8 +32,9 @@ class WandInvIndex : public SimplInvIndex { * which are guaranteed to be be valid during testing. * So, we can memorize them safely. */ - WandInvIndex(Space& space, - const ObjectVector& data) : SimplInvIndex(space, data) { + WandInvIndex(bool printProgress, + Space& space, + const ObjectVector& data) : SimplInvIndex(printProgress, space, data) { } void CreateIndex(const AnyParams& IndexParams) override; diff --git a/similarity_search/src/method/simple_inverted_index.cc b/similarity_search/src/method/simple_inverted_index.cc index 3f05bbf..770233f 100644 --- a/similarity_search/src/method/simple_inverted_index.cc +++ b/similarity_search/src/method/simple_inverted_index.cc @@ -159,14 +159,16 @@ void SimplInvIndex::CreateIndex(AnyParamManager& ParamManager) { LOG(LIB_INFO) << "Collecting dictionary stat"; { - ProgressDisplay pbar(this->data_.size(), cerr); + unique_ptr pbar(printProgress_ ? + new ProgressDisplay(this->data_.size(), cerr) : nullptr); for (const Object* o : this->data_) { tmp_vect.clear(); UnpackSparseElements(o->data(), o->datalength(), tmp_vect); for (const auto& e : tmp_vect) dict_qty[e.id_] ++; - ++pbar; + if (pbar) ++(*pbar); } + if (pbar) pbar->finish(); } LOG(LIB_INFO) << "Actually creating the index"; @@ -182,7 +184,8 @@ void SimplInvIndex::CreateIndex(AnyParamManager& ParamManager) { } { - ProgressDisplay pbar(this->data_.size(), cerr); + unique_ptr pbar(printProgress_ ? + new ProgressDisplay(this->data_.size(), cerr) : nullptr); // Fill posting lists for (size_t did = 0; did < this->data_.size(); ++did) { @@ -207,8 +210,10 @@ void SimplInvIndex::CreateIndex(AnyParamManager& ParamManager) { CHECK(curr_pos < pl.qty_); pl.entries_[curr_pos] = PostEntry(did, e.val_); } - ++pbar; + if (pbar) ++(*pbar); } + + if (pbar) pbar->finish(); } #ifdef SANITY_CHECKS // Sanity check diff --git a/similarity_search/test/test_integr.cc b/similarity_search/test/test_integr.cc index 1f6aeda..bda47e2 100644 --- a/similarity_search/test/test_integr.cc +++ b/similarity_search/test/test_integr.cc @@ -58,195 +58,212 @@ using std::stringstream; #define TEST_SET_QTY 20 #define MAX_NUM_QUERY 100 +#define INDEX_FILE_NAME "index.tmp" + +#define TEST_HNSW 1 +#define TEST_SW_GRAPH 1 +#define TEST_IR 1 +#define TEST_OTHER 1 + vector vTestCaseDesc = { - MethodTestCase(DIST_TYPE_FLOAT, "negdotprod_sparse_fast", "sparse_5K.txt", "simple_invindx", "", "", - 10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.001, 450, 510), - MethodTestCase(DIST_TYPE_FLOAT, "negdotprod_sparse_fast", "sparse_5K.txt", "wand_invindx", "", "", - 10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.001, 450, 510), - MethodTestCase(DIST_TYPE_FLOAT, "negdotprod_sparse_fast", "sparse_5K.txt", "blkmax_invindx", "", "", - 10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.001, 450, 510), -#if 1 - MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse", "sparse_5K.txt", "hnsw", "efConstruction=50,M=10,skip_optimized_index=1", "ef=50", +#if (TEST_HNSW) + MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse", "sparse_5K.txt", "hnsw", true, "efConstruction=50,M=10,skip_optimized_index=1", "ef=50", 10 /* KNN-10 */, 0 /* no range search */ , 0.88, 0.96, 0.0, 1, 6, 12), - MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse_fast", "sparse_5K.txt", "sw-graph", "efConstruction=50,NN=10", "efSearch=50", - 10 /* KNN-10 */, 0 /* no range search */ , 0.88, 0.96, 0.0, 1, 5, 10), - MethodTestCase(DIST_TYPE_FLOAT, "angulardist_sparse", "sparse_5K.txt", "hnsw", "efConstruction=50,M=10,skip_optimized_index=1", "ef=50", + MethodTestCase(DIST_TYPE_FLOAT, "angulardist_sparse", "sparse_5K.txt", "hnsw", true, "efConstruction=50,M=10,skip_optimized_index=1", "ef=50", 10 /* KNN-10 */, 0 /* no range search */ , 0.88, 0.96, 0.0, 1, 6, 12), - MethodTestCase(DIST_TYPE_FLOAT, "angulardist_sparse_fast", "sparse_5K.txt", "sw-graph", "efConstruction=50,NN=10", "efSearch=50", + MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil", "final8_10K.txt", "hnsw", true, "efConstruction=50,M=10,skip_optimized_index=1", "ef=50", + 10 /* KNN-10 */, 0 /* no range search */ , 0.97, 1, 0, 0.1, 40, 60), + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "hnsw", true, "efConstruction=50,M=10,skip_optimized_index=1", "ef=50", + 10 /* KNN-10 */, 0 /* no range search */ , 0.97, 1, 0, 0.1, 40, 60), +#endif + +#if (TEST_SW_GRAPH) + MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse_fast", "sparse_5K.txt", "sw-graph", true, "efConstruction=50,NN=10", "efSearch=50", 10 /* KNN-10 */, 0 /* no range search */ , 0.88, 0.96, 0.0, 1, 5, 10), - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "nonmetr_list_clust", "clusterType=clarans,centerQty=10", "dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "angulardist_sparse_fast", "sparse_5K.txt", "sw-graph", true, "efConstruction=50,NN=10", "efSearch=50", + 10 /* KNN-10 */, 0 /* no range search */ , 0.88, 0.96, 0.0, 1, 5, 10), +#endif + + +#if (TEST_IR) + MethodTestCase(DIST_TYPE_FLOAT, "negdotprod_sparse_fast", "sparse_5K.txt", "simple_invindx", false, "", "", + 10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.001, 450, 510), + MethodTestCase(DIST_TYPE_FLOAT, "negdotprod_sparse_fast", "sparse_5K.txt", "wand_invindx", false, "", "", + 10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.001, 450, 510), + MethodTestCase(DIST_TYPE_FLOAT, "negdotprod_sparse_fast", "sparse_5K.txt", "blkmax_invindx", false, "", "", + 10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.001, 450, 510), +#endif + +#if (TEST_OTHER) + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "nonmetr_list_clust", false, "clusterType=clarans,centerQty=10", "dbScanFrac=0.1", 10 /* KNN-10 */, 0 /* no range search */ , 0.85, 0.95, 0.01, 5, 2, 7), // ************** Tests for non-metric clustering *********** // - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "nonmetr_list_clust", "clusterType=firmal,centerQty=10", "dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "nonmetr_list_clust", false, "clusterType=firmal,centerQty=10", "dbScanFrac=0.1", 10 /* KNN-10 */, 0 /* no range search */ , 0.8, 0.92, 0.1, 20, 2.5, 6), - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "nonmetr_list_clust", "clusterType=reduct_clarans,centerQty=10", "dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "nonmetr_list_clust", false, "clusterType=reduct_clarans,centerQty=10", "dbScanFrac=0.1", 10 /* KNN-10 */, 0 /* no range search */ , 0.85, 0.95, 0.01, 5, 2, 7), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", "numPivot=8,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", true, "numPivot=8,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), - MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil", "final8_10K.txt", "hnsw", "efConstruction=50,M=10,skip_optimized_index=1", "ef=50", - 10 /* KNN-10 */, 0 /* no range search */ , 0.97, 1, 0, 0.1, 40, 60), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "hnsw", "efConstruction=50,M=10,skip_optimized_index=1", "ef=50", - 10 /* KNN-10 */, 0 /* no range search */ , 0.97, 1, 0, 0.1, 40, 60), // *************** NEW versions of permutation & projection-based filtering method tests ******************** // - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "seq_search", "", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "seq_search", false, "", "", 1 /* KNN-1 */, 0 /* no range search */ , 1.0, 1.0, 0, 0, 1, 1), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "seq_search", "", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "seq_search", false, "", "", 0 /* no-knn search */, 0.2 /* range 0.2 */ , 1.0, 1.0, 0, 0, 1, 1), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "seq_search", "multiThread=1,threadQty=4", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "seq_search", false, "multiThread=1,threadQty=4", "", 1 /* KNN-1 */, 0 /* no range search */ , 1.0, 1.0, 0, 0, 1, 1), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "seq_search", "multiThread=1,threadQty=4", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "seq_search", false, "multiThread=1,threadQty=4", "", 0 /* no-knn search */, 0.2 /* range 0.2 */ , 1.0, 1.0, 0, 0, 1, 1), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "small_world_rand", "NN=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "small_world_rand", true, "NN=10", "", 1 /* KNN-1 */, 0 /* no range search */ , 0.9, 1.0, 0, 1.0, 40, 52), // 4 different types of projections - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", "projType=perm,projDim=4", "dbScanFrac=1.0", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", false, "projType=perm,projDim=4", "dbScanFrac=1.0", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", "projType=rand,projDim=4", "dbScanFrac=1.0", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", false, "projType=rand,projDim=4", "dbScanFrac=1.0", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", "projType=fastmap,projDim=4", "dbScanFrac=1.0", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", false, "projType=fastmap,projDim=4", "dbScanFrac=1.0", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", "projType=randrefpt,projDim=4", "dbScanFrac=1.0", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", false, "projType=randrefpt,projDim=4", "dbScanFrac=1.0", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), // Proj. VP-tree - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_vptree", "projType=perm,projDim=4", "alphaLeft=2,alphaRight=2,dbScanFrac=1.0", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_vptree", false, "projType=perm,projDim=4", "alphaLeft=2,alphaRight=2,dbScanFrac=1.0", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt","pp-index", "numPivot=4,prefixLength=4", "minCandidate=10000", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt","pp-index", false, "numPivot=4,prefixLength=4", "minCandidate=10000", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mi-file", "numPivot=16,numPivotIndex=16", "numPivotSearch=16,dbScanFrac=1.0", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mi-file", false, "numPivot=16,numPivotIndex=16", "numPivotSearch=16,dbScanFrac=1.0", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", "numPivot=8,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", true, "numPivot=8,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), // Binarized permutations - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_incsort_bin", "numPivot=32", "dbScanFrac=1.0", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_incsort_bin", false, "numPivot=32", "dbScanFrac=1.0", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_bin_vptree", "numPivot=32", "alphaLeft=2,alphaRight=2,dbScanFrac=1.0", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_bin_vptree", false, "numPivot=32", "alphaLeft=2,alphaRight=2,dbScanFrac=1.0", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01), // 4 different types of projections - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", "projType=perm,projDim=4", "dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", false, "projType=perm,projDim=4", "dbScanFrac=0.1", 1 /* KNN-1 */, 0 /* no range search */ , 0.4, 0.7, 0.5, 4, 8, 12), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", "projType=rand,projDim=4", "dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", false, "projType=rand,projDim=4", "dbScanFrac=0.1", 1 /* KNN-1 */, 0 /* no range search */ , 0.9, 1.01, 0.0, 0.2, 8, 12), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", "projType=fastmap,projDim=4", "dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", false, "projType=fastmap,projDim=4", "dbScanFrac=0.1", 1 /* KNN-1 */, 0 /* no range search */ , 0.9, 1.01, 0.0, 0.2, 8, 12), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", "projType=randrefpt,projDim=4", "dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_incsort", false, "projType=randrefpt,projDim=4", "dbScanFrac=0.1", 1 /* KNN-1 */, 0 /* no range search */ , 0.9, 1.01, 0.0, 0.2, 8, 12), // Proj. VP-tree - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_vptree", "projType=perm,projDim=4", "alphaLeft=2,alphaRight=2,dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "proj_vptree", false, "projType=perm,projDim=4", "alphaLeft=2,alphaRight=2,dbScanFrac=0.1", 1 /* KNN-1 */, 0 /* no range search */ , 0.4, 0.7, 0.5, 4, 8, 12), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt","pp-index", "numPivot=4,prefixLength=4", "minCandidate=100", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt","pp-index", false, "numPivot=4,prefixLength=4", "minCandidate=100", 1 /* KNN-1 */, 0 /* no range search */ , 0.8, 1.0, 0.1, 2, 3, 8), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mi-file", "numPivot=16,numPivotIndex=16", "numPivotSearch=16,dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mi-file", false, "numPivot=16,numPivotIndex=16", "numPivotSearch=16,dbScanFrac=0.1", 1 /* KNN-1 */, 0 /* no range search */ , 0.95, 1.0, 0, 0.5, 8, 12), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", "numPivot=32,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", true, "numPivot=32,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8", 1 /* KNN-1 */, 0 /* no range search */ , 0.6, 0.8, 1, 4, 22, 37), // Binarized - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_incsort_bin", "numPivot=32", "dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_incsort_bin", true, "numPivot=32", "dbScanFrac=0.1", 1 /* KNN-1 */, 0 /* no range search */ , 0.9, 1.0, 0.01, 0.3, 8, 12), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_bin_vptree", "numPivot=32", "alphaLeft=2,alphaRight=2,dbScanFrac=0.1", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_bin_vptree", true, "numPivot=32", "alphaLeft=2,alphaRight=2,dbScanFrac=0.1", 1 /* KNN-1 */, 0 /* no range search */ , 0.9, 1.0, 0.01, 0.5, 8, 12), // *************** omedrank tests ******************** // - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "omedrank", "numPivot=4,chunkIndexSize=16536", "dbScanFrac=0.01,minFreq=0.5", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "omedrank", false, "numPivot=4,chunkIndexSize=16536", "dbScanFrac=0.01,minFreq=0.5", 1 /* KNN-1 */, 0 /* no range search */ , 0.7, 0.97, 0.1, 3, 70, 120), - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "omedrank", "numPivot=4,chunkIndexSize=16536", "dbScanFrac=0.01,minFreq=0.5", + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "omedrank", false, "numPivot=4,chunkIndexSize=16536", "dbScanFrac=0.01,minFreq=0.5", 1 /* KNN-1 */, 0 /* no range search */ , 0.6, 0.9, 0.1, 3, 70, 120), // *************** VP-tree tests ******************** // // knn - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "", 1 /* KNN-1 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 40, 80), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "alphaLeft=2,alphaRight=2", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "alphaLeft=2,alphaRight=2", 1 /* KNN-1 */, 0 /* no range search */ , 0.93, 0.97, 0.03, 0.09, 120, 190), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final128_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final128_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "", 1 /* KNN-1 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 1.5, 2.5), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final128_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "alphaLeft=2,alphaRight=2", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final128_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "alphaLeft=2,alphaRight=2", 1 /* KNN-1 */, 0 /* no range search */ , 0.98, 1.0, 0.0, 0.02, 2.8, 5.5), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "", 10 /* KNN-10 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 20, 30), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "alphaLeft=2,alphaRight=2", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "alphaLeft=2,alphaRight=2", 10 /* KNN-10 */, 0 /* no range search */ , 0.93, 0.96, 0.0, 0.02, 56, 80), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final128_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final128_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "", 10 /* KNN-10 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 1.1, 1.6), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final128_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "alphaLeft=2,alphaRight=2", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final128_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "alphaLeft=2,alphaRight=2", 10 /* KNN-10 */, 0 /* no range search */ , 0.98, 0.999, 0.0, 0.01, 1.5, 2.5), // range - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "", 0 /* no KNN */, 0.1 /* range search radius 0.1 */ , 1.0, 1.0, 0.0, 0.0, 23, 30), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", "chunkBucket=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "vptree", false, "chunkBucket=1,bucketSize=10", "", 0 /* no KNN */, 0.5 /* range search radius 0.5 */ , 1.0, 1.0, 0.0, 0.0, 2.4, 4), // *************** MVP-tree tests ******************** // // knn - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", "maxPathLen=4,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", false, "maxPathLen=4,bucketSize=10", "", 1 /* KNN-1 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 120, 140), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", "maxPathLen=4,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", false, "maxPathLen=4,bucketSize=10", "", 10 /* KNN-10 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 40, 50), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", "maxPathLen=4,bucketSize=10", "maxLeavesToVisit=10", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", false, "maxPathLen=4,bucketSize=10", "maxLeavesToVisit=10", 1 /* KNN-1 */, 0 /* no range search */ , 0.82, 0.9, 0.2, 3.5, 230, 250), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", "maxPathLen=4,bucketSize=10", "maxLeavesToVisit=20", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", false, "maxPathLen=4,bucketSize=10", "maxLeavesToVisit=20", 10 /* KNN-10 */, 0 /* no range search */ , 0.75, 0.82, 0.2, 2.0, 85, 100), // range - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", "maxPathLen=4,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", false, "maxPathLen=4,bucketSize=10", "", 0 /* no KNN */, 0.1 /* range search radius 0.1 */ , 1.0, 1.0, 0.0, 0.0, 40, 55), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", "maxPathLen=4,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mvptree", false, "maxPathLen=4,bucketSize=10", "", 0 /* no KNN */, 0.5 /* range search radius 0.5*/ , 1.0, 1.0, 0.0, 0.0, 3, 4), // *************** GH-tree tests ******************** // // knn - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", false, "bucketSize=10", "", 1 /* KNN-1 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 28, 35), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", false, "bucketSize=10", "", 10 /* KNN-10 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 8, 10.2), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", "bucketSize=10", "maxLeavesToVisit=10", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", false, "bucketSize=10", "maxLeavesToVisit=10", 1 /* KNN-1 */, 0 /* no range search */ , 0.8, 0.87, 0.2, 1.5, 95, 115), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", "bucketSize=10", "maxLeavesToVisit=20", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", false, "bucketSize=10", "maxLeavesToVisit=20", 10 /* KNN-10 */, 0 /* no range search */ , 0.75, 0.82, 0.1, 1.0, 52, 62), // range - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", false, "bucketSize=10", "", 0 /* no KNN */, 0.1 /* range search radius 0.1 */ , 1.0, 1.0, 0.0, 0.0, 10, 16), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "ghtree", false, "bucketSize=10", "", 0 /* no KNN */, 0.5 /* range search radius 0.5*/ , 1.0, 1.0, 0.0, 0.0, 1, 1.2), // *************** SA-tree tests ******************** // // knn - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "satree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "satree", false, "bucketSize=10", "", 1 /* KNN-1 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 25, 33), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "satree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "satree", false, "bucketSize=10", "", 10 /* KNN-10 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 10, 25), // range - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "satree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "satree", false, "bucketSize=10", "", 0 /* no KNN */, 0.1 /* range search radius 0.1 */ , 1.0, 1.0, 0.0, 0.0, 13, 18), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "satree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "satree", false, "bucketSize=10", "", 0 /* no KNN */, 0.5 /* range search radius 0.5*/ , 1.0, 1.0, 0.0, 0.0, 2.8, 3.4), // *************** List of clusters tests ******************** // // knn - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", "strategy=random,useBucketSize=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", false, "strategy=random,useBucketSize=1,bucketSize=10", "", 1 /* KNN-1 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 9.5, 11.5), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", "strategy=random,useBucketSize=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", false, "strategy=random,useBucketSize=1,bucketSize=10", "", 10 /* KNN-10 */, 0 /* no range search */ , 1.0, 1.0, 0.0, 0.0, 7.5, 8.5), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", "strategy=random,useBucketSize=1,bucketSize=10", "maxLeavesToVisit=10", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", false, "strategy=random,useBucketSize=1,bucketSize=10", "maxLeavesToVisit=10", 1 /* KNN-1 */, 0 /* no range search */ , 0.78, 0.9, 0.2, 1.5, 9.5, 11.5), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", "strategy=random,useBucketSize=1,bucketSize=10", "maxLeavesToVisit=20", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", false, "strategy=random,useBucketSize=1,bucketSize=10", "maxLeavesToVisit=20", 10 /* KNN-10 */, 0 /* no range search */ , 0.85, 0.97, 0.05, 0.7, 8.5, 10.5), // range - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", "strategy=random,useBucketSize=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", false, "strategy=random,useBucketSize=1,bucketSize=10", "", 0 /* no KNN */, 0.1 /* range search radius 0.1 */ , 1.0, 1.0, 0.0, 0.0, 8, 10), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", "strategy=random,useBucketSize=1,bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "list_clusters", false, "strategy=random,useBucketSize=1,bucketSize=10", "", 0 /* no KNN */, 0.5 /* range search radius 0.5*/ , 1.0, 1.0, 0.0, 0.0, 2.4, 3.4), // *************** bbtree tests ******************** // @@ -257,46 +274,46 @@ vector vTestCaseDesc = { * need to debug it in the future. * Therefore, we expect a slightly imperfect recall sometimes. */ - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "", 1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.0, 9.5, 11.5), - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "", 10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.0, 5.5, 8), - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", "bucketSize=10", "maxLeavesToVisit=10", + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "maxLeavesToVisit=10", 1 /* KNN-1 */, 0 /* no range search */ , 0.75, 0.85, 0.3, 1.5, 48, 52), - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", "bucketSize=10", "maxLeavesToVisit=20", + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "maxLeavesToVisit=20", 10 /* KNN-10 */, 0 /* no range search */ , 0.7, 0.78, 0.3, 1.6, 28, 37), // range - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "", 0 /* no KNN */, 0.1 /* range search radius 0.1 */ , 0.999, 1.0, 0.0, 0.0, 4.5, 6.5), - MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", "bucketSize=10", "", + MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "", 0 /* no KNN */, 0.5 /* range search radius 0.5*/ , 0.999, 1.0, 0.0, 0.0, 1.2, 2.4), #ifdef WITH_EXTRAS // *************** multi-probe LSH tests ******************** // // knn - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "lsh_multiprobe", "desiredRecall=0.5,tuneK=1,T=5,L=25,H=16535", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "lsh_multiprobe", false, "desiredRecall=0.5,tuneK=1,T=5,L=25,H=16535", "", 1 /* KNN-1 */, 0 /* no range search */ , 0.45, 0.6, 45, 80, 70, 130), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "lsh_multiprobe", "desiredRecall=0.5,tuneK=10,T=5,L=25,H=16535", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "lsh_multiprobe", false, "desiredRecall=0.5,tuneK=10,T=5,L=25,H=16535", "", 10 /* KNN-10 */, 0 /* no range search */ , 0.45, 0.6, 10, 40, 70, 130), // *************** Guassian LSH tests ******************** // - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "lsh_gaussian", "W=2,L=5,M=40,H=16535", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "lsh_gaussian", false, "W=2,L=5,M=40,H=16535", "", 1 /* KNN-1 */, 0 /* no range search */ , 0.85, 0.95, 0.1, 40, 70, 130), - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "lsh_gaussian", "W=2,L=5,M=40,H=16535", "", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "lsh_gaussian", false, "W=2,L=5,M=40,H=16535", "", 10 /* KNN-10 */, 0 /* no range search */ , 0.68, 0.82, 0.1, 50, 70, 130), // *************** Cauchy LSH tests ******************** // - MethodTestCase(DIST_TYPE_FLOAT, "l1", "final8_10K.txt", "lsh_cauchy", "W=2,L=5,M=10,H=16535", "", + MethodTestCase(DIST_TYPE_FLOAT, "l1", "final8_10K.txt", "lsh_cauchy", false, "W=2,L=5,M=10,H=16535", "", 1 /* KNN-1 */, 0 /* no range search */ , 0.7, 0.9, 0.1, 50, 70, 130), - MethodTestCase(DIST_TYPE_FLOAT, "l1", "final8_10K.txt", "lsh_cauchy", "W=2,L=5,M=10,H=16535", "", + MethodTestCase(DIST_TYPE_FLOAT, "l1", "final8_10K.txt", "lsh_cauchy", false, "W=2,L=5,M=10,H=16535", "", 10 /* KNN-10 */, 0 /* no range search */ , 0.5, 0.8, 0.1, 50, 70, 120), // *************** Thresholding LSH tests ******************** // - MethodTestCase(DIST_TYPE_FLOAT, "l1", "final8_10K.txt", "lsh_threshold", "L=5,M=60,H=16535", "", + MethodTestCase(DIST_TYPE_FLOAT, "l1", "final8_10K.txt", "lsh_threshold", false, "L=5,M=60,H=16535", "", 1 /* KNN-1 */, 0 /* no range search */ , 0.8, 0.99, 0.1, 50, 40, 70), - MethodTestCase(DIST_TYPE_FLOAT, "l1", "final8_10K.txt", "lsh_threshold", "L=5,M=60,H=16535", "", + MethodTestCase(DIST_TYPE_FLOAT, "l1", "final8_10K.txt", "lsh_threshold", false, "L=5,M=60,H=16535", "", 10 /* KNN-10 */, 0 /* no range search */ , 0.65, 0.85, 0.1, 50, 40, 70), // Old NN-descent - MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "nndes", "NN=10,rho=0.5,delta=0.001", "initSearchAttempts=10", + MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "nndes", false, "NN=10,rho=0.5,delta=0.001", "initSearchAttempts=10", 1 /* KNN-1 */, 0 /* no range search */ , 0.9, 1.0, 0, 1.0, 5, 12), #endif #endif @@ -320,9 +337,9 @@ int main(int ac, char* av[]) { set setRange; for (const auto& testCase: vTestCaseDesc) { - setDistType.insert(testCase.mDistType); - setSpaceType.insert(testCase.mSpaceType); - setDataSet.insert(testCase.mDataSet); + setDistType.insert(testCase.distType_); + setSpaceType.insert(testCase.spaceType_); + setDataSet.insert(testCase.dataSet_); if (testCase.mKNN > 0) setKNN.insert(testCase.mKNN); if (testCase.mRange > 0) @@ -333,84 +350,95 @@ int main(int ac, char* av[]) { size_t nFail = 0; try { - /* - * 1. Let's iterate over all combinations of data sets, - * distance, and space types. - * 2. For each combination, we select test cases - * with exactly same data set, distance and space type. - * 3. Create an array of arguments in the same format - * as used by the main benchmarking utility. - * 4. Use a standard function to parse these arguments. - */ - for (string dataSet : setDataSet) - for (string distType : setDistType) - for (string spaceType: setSpaceType) { - string dataFile = sampleDataPrefix + dataSet; - - for (unsigned K: setKNN) { - vector vTestCases; - - // Select appropriate test cases to share the same gold-standard data - for (const auto& testCase: vTestCaseDesc) { - if (testCase.mDataSet == dataSet && - testCase.mDistType == distType && - testCase.mSpaceType == spaceType && - testCase.mKNN == K) { - vTestCases.push_back(MethodTestCase(testCase)); + for (int bTestReload = 0; bTestReload < 2; ++bTestReload) { + cout << "Testing index reload: " << (bTestReload == 1) << endl; + cout << "==================================================" << endl; + /* + * 1. Let's iterate over all combinations of data sets, + * distance, and space types. + * 2. For each combination, we select test cases + * with exactly same data set, distance and space type. + * 3. Create an array of arguments in the same format + * as used by the main benchmarking utility. + * 4. Use a standard function to parse these arguments. + */ + for (string dataSet : setDataSet) + for (string distType : setDistType) + for (string spaceType: setSpaceType) { + string dataFile = sampleDataPrefix + dataSet; + + for (unsigned K: setKNN) { + vector vTestCases; + + // Select appropriate test cases to share the same gold-standard data + for (const auto& testCase: vTestCaseDesc) { + if ((bTestReload == 0 || testCase.testReload_) && + testCase.dataSet_ == dataSet && + testCase.distType_ == distType && + testCase.spaceType_ == spaceType && + testCase.mKNN == K) { + vTestCases.push_back(MethodTestCase(testCase)); + } } - } - if (!vTestCases.empty()) { // Not all combinations of spaces, data sets, and search types are non-empty - for (size_t threadQty = 1; threadQty <= MAX_THREAD_QTY; ++threadQty) { - nTest += vTestCases.size(); - nFail += RunOneTest(vTestCases, - distType, - spaceType, - threadQty, - TEST_SET_QTY, - dataFile, - "", - 0, - MAX_NUM_QUERY, - ConvertToString(K), - 0, - "" - ); + if (!vTestCases.empty()) { // Not all combinations of spaces, data sets, and search types are non-empty + for (size_t threadQty = 1; threadQty <= MAX_THREAD_QTY; ++threadQty) { + nTest += vTestCases.size(); + nFail += RunOneTest(vTestCases, + bTestReload == 1, + INDEX_FILE_NAME, + distType, + spaceType, + threadQty, + TEST_SET_QTY, + dataFile, + "", + 0, + MAX_NUM_QUERY, + ConvertToString(K), + 0, + "" + ); + } } } - } - for (float R: setRange) { - vector vTestCases; + for (float R: setRange) { + vector vTestCases; - // Select appropriate test cases to share the same gold-standard data - for (const auto& testCase: vTestCaseDesc) { - if (testCase.mDataSet == dataSet && - testCase.mDistType == distType && - testCase.mSpaceType == spaceType && - testCase.mRange == R) { - vTestCases.push_back(MethodTestCase(testCase)); + // Select appropriate test cases to share the same gold-standard data + for (const auto& testCase: vTestCaseDesc) { + if ((bTestReload == 0 || testCase.testReload_) && + testCase.dataSet_ == dataSet && + testCase.distType_ == distType && + testCase.spaceType_ == spaceType && + testCase.mRange == R) { + vTestCases.push_back(MethodTestCase(testCase)); + } } - } - if (!vTestCases.empty()) { // Not all combinations of spaces, data sets, and search types are non-empty - for (size_t threadQty = 1; threadQty <= MAX_THREAD_QTY; ++threadQty) { - nTest += vTestCases.size(); - nFail += RunOneTest(vTestCases, - distType, - spaceType, - threadQty, - TEST_SET_QTY, - dataFile, - "", - 0, - MAX_NUM_QUERY, - "", - 0, - ConvertToString(R) - ); + if (!vTestCases.empty()) { // Not all combinations of spaces, data sets, and search types are non-empty + for (size_t threadQty = 1; threadQty <= MAX_THREAD_QTY; ++threadQty) { + nTest += vTestCases.size(); + nFail += RunOneTest(vTestCases, + bTestReload == 1, + INDEX_FILE_NAME, + distType, + spaceType, + threadQty, + TEST_SET_QTY, + dataFile, + "", + 0, + MAX_NUM_QUERY, + "", + 0, + ConvertToString(R) + ); + } } + } } @@ -430,9 +458,9 @@ int main(int ac, char* av[]) { LOG(LIB_INFO) << "Time elapsed = " << timer.elapsed() / 1e6; LOG(LIB_INFO) << "Finished at " << LibGetCurrentTime(); - cerr << endl << "==================================================" << endl; - cerr << (nFail ? "FAILURE" : "SUCCESS") << endl; - cerr << "Carried out: " << nTest << " tests. Failed: " << nFail << " tests" << endl; + cout << endl << "==================================================" << endl; + cout << (nFail ? "FAILURE" : "SUCCESS") << endl; + cout << "Carried out: " << nTest << " tests. Failed: " << nFail << " tests" << endl; return nFail ? 1:0; } diff --git a/similarity_search/test/test_integr_util.h b/similarity_search/test/test_integr_util.h index 1df9c2d..bca62b9 100644 --- a/similarity_search/test/test_integr_util.h +++ b/similarity_search/test/test_integr_util.h @@ -21,6 +21,7 @@ #include #include #include +#include #include "utils.h" #include "bunit.h" @@ -45,19 +46,20 @@ using namespace similarity; * 3) Search outcome (recall range, range for the improvement in distance computation) */ struct MethodTestCase { - string mDistType; - string mSpaceType; - string mDataSet; - string mMethodName; - string mIndexParams; - string mQueryTimeParams; - float mRecallMin; - float mRecallMax; - bool mRecallOnly; - float mNumCloserMin; - float mNumCloserMax; - float mImprDistCompMin; - float mImprDistCompMax; + string distType_; + string spaceType_; + string dataSet_; + string methodName_; + bool testReload_; // Test save/load index + string indexParams_; + string queryTypeParams_; + float recallMin_; + float recallMax_; + bool recallOnly_; + float numCloserMin_; + float numCloserMax_; + float imprDistCompMin_; + float imprDistCompMax_; unsigned mKNN; float mRange; @@ -67,6 +69,7 @@ struct MethodTestCase { string spaceType, string dataSet, string methodName, + bool testReload, string indexParams, string queryTimeParams, unsigned knn, @@ -78,22 +81,23 @@ struct MethodTestCase { float imprDistCompMin, float imprDistCompMax, bool recallOnly = false) : - mDistType(distType), - mSpaceType(spaceType), - mDataSet(dataSet), - mMethodName(methodName), - mIndexParams(indexParams), - mQueryTimeParams(queryTimeParams), - mRecallMin(recallMin), - mRecallMax(recallMax), - mRecallOnly(recallOnly), - mNumCloserMin(numCloserMin), - mNumCloserMax(numCloserMax), - mImprDistCompMin(imprDistCompMin), - mImprDistCompMax(imprDistCompMax), + distType_(distType), + spaceType_(spaceType), + dataSet_(dataSet), + methodName_(methodName), + indexParams_(indexParams), + testReload_(testReload), + queryTypeParams_(queryTimeParams), + recallMin_(recallMin), + recallMax_(recallMax), + recallOnly_(recallOnly), + numCloserMin_(numCloserMin), + numCloserMax_(numCloserMax), + imprDistCompMin_(imprDistCompMin), + imprDistCompMax_(imprDistCompMax), mKNN(knn), mRange(range) { - ToLower(mDistType); + ToLower(distType_); ToLower(spaceType); } }; @@ -112,77 +116,77 @@ bool ProcessAndCheckResults( ExpRes.ComputeAll(); - PrintStr = produceHumanReadableReport(config, ExpRes, testCase.mMethodName, testCase.mIndexParams, testCase.mQueryTimeParams); + PrintStr = produceHumanReadableReport(config, ExpRes, testCase.methodName_, testCase.indexParams_, testCase.queryTypeParams_); bool bFail = false; - if (ExpRes.GetRecallAvg() < testCase.mRecallMin) { - cerr << "Failed to meet min recall requirement, expect >= " << testCase.mRecallMin + if (ExpRes.GetRecallAvg() < testCase.recallMin_) { + cerr << "Failed to meet min recall requirement, expect >= " << testCase.recallMin_ << " got " << ExpRes.GetRecallAvg() << endl - << " method: " << testCase.mMethodName << " ; " - << " index-time params: " << testCase.mIndexParams << " ; " - << " query-time params: " << testCase.mQueryTimeParams << " ; " - << " data set: " << testCase.mDataSet << " ; " + << " method: " << testCase.methodName_ << " ; " + << " index-time params: " << testCase.indexParams_ << " ; " + << " query-time params: " << testCase.queryTypeParams_ << " ; " + << " data set: " << testCase.dataSet_ << " ; " << " dist value type: " << distType << " ; " << " space type: " << spaceType << endl << cmdStr << endl; bFail = true; } - if (ExpRes.GetRecallAvg() > testCase.mRecallMax) { - cerr << "Failed to meet max recall requirement, expect <= " << testCase.mRecallMax + if (ExpRes.GetRecallAvg() > testCase.recallMax_) { + cerr << "Failed to meet max recall requirement, expect <= " << testCase.recallMax_ << " got " << ExpRes.GetRecallAvg() << endl - << " method: " << testCase.mMethodName << " ; " - << " index-time params: " << testCase.mIndexParams << " ; " - << " query-time params: " << testCase.mQueryTimeParams << " ; " - << " data set: " << testCase.mDataSet << " ; " + << " method: " << testCase.methodName_ << " ; " + << " index-time params: " << testCase.indexParams_ << " ; " + << " query-time params: " << testCase.queryTypeParams_ << " ; " + << " data set: " << testCase.dataSet_ << " ; " << " dist value type: " << distType << " ; " << " space type: " << spaceType << endl << cmdStr << endl; bFail = true; } - if (ExpRes.GetNumCloserAvg() < testCase.mNumCloserMin) { - cerr << "Failed to meet min # of points closer requirement, expect >= " << testCase.mNumCloserMin + if (ExpRes.GetNumCloserAvg() < testCase.numCloserMin_) { + cerr << "Failed to meet min # of points closer requirement, expect >= " << testCase.numCloserMin_ << " got " << ExpRes.GetNumCloserAvg() << endl - << " method: " << testCase.mMethodName << " ; " - << " index-time params: " << testCase.mIndexParams << " ; " - << " query-time params: " << testCase.mQueryTimeParams << " ; " - << " data set: " << testCase.mDataSet << " ; " + << " method: " << testCase.methodName_ << " ; " + << " index-time params: " << testCase.indexParams_ << " ; " + << " query-time params: " << testCase.queryTypeParams_ << " ; " + << " data set: " << testCase.dataSet_ << " ; " << " dist value type: " << distType << " ; " << " space type: " << spaceType << endl << cmdStr << endl; bFail = true; } - if (ExpRes.GetNumCloserAvg() > testCase.mNumCloserMax) { - cerr << "Failed to meet max # of points closer requirement, expect <= " << testCase.mNumCloserMax + if (ExpRes.GetNumCloserAvg() > testCase.numCloserMax_) { + cerr << "Failed to meet max # of points closer requirement, expect <= " << testCase.numCloserMax_ << " got " << ExpRes.GetNumCloserAvg() << endl - << " method: " << testCase.mMethodName << " ; " - << " index-time params: " << testCase.mIndexParams << " ; " - << " query-time params: " << testCase.mQueryTimeParams << " ; " - << " data set: " << testCase.mDataSet << " ; " + << " method: " << testCase.methodName_ << " ; " + << " index-time params: " << testCase.indexParams_ << " ; " + << " query-time params: " << testCase.queryTypeParams_ << " ; " + << " data set: " << testCase.dataSet_ << " ; " << " dist value type: " << distType << " ; " << " space type: " << spaceType << endl << cmdStr << endl; bFail = true; } - if (ExpRes.GetImprDistCompAvg() < testCase.mImprDistCompMin) { + if (ExpRes.GetImprDistCompAvg() < testCase.imprDistCompMin_) { cerr << "Failed to meet min improvement requirement in the # of distance computations, expect >= " - << testCase.mImprDistCompMin << " got " << ExpRes.GetImprDistCompAvg() << endl - << " method: " << testCase.mMethodName << " ; " - << " index-time params: " << testCase.mIndexParams << " ; " - << " query-time params: " << testCase.mQueryTimeParams << " ; " - << " data set: " << testCase.mDataSet << " ; " + << testCase.imprDistCompMin_ << " got " << ExpRes.GetImprDistCompAvg() << endl + << " method: " << testCase.methodName_ << " ; " + << " index-time params: " << testCase.indexParams_ << " ; " + << " query-time params: " << testCase.queryTypeParams_ << " ; " + << " data set: " << testCase.dataSet_ << " ; " << " dist value type: " << distType << " ; " << " space type: " << spaceType << endl << cmdStr << endl; bFail = true; } - if (ExpRes.GetImprDistCompAvg() > testCase.mImprDistCompMax) { + if (ExpRes.GetImprDistCompAvg() > testCase.imprDistCompMax_) { cerr << "Failed to meet max improvement requirement in the # of distance computations, expect <= " - << testCase.mImprDistCompMax << " got " << ExpRes.GetImprDistCompAvg() << endl - << " method: " << testCase.mMethodName << " ; " - << " index-time params: " << testCase.mIndexParams << " ; " - << " query-time params: " << testCase.mQueryTimeParams << " ; " - << " data set: " << testCase.mDataSet << " ; " + << testCase.imprDistCompMax_ << " got " << ExpRes.GetImprDistCompAvg() << endl + << " method: " << testCase.methodName_ << " ; " + << " index-time params: " << testCase.indexParams_ << " ; " + << " query-time params: " << testCase.queryTypeParams_ << " ; " + << " data set: " << testCase.dataSet_ << " ; " << " dist value type: " << distType << " ; " << " space type: " << spaceType << endl << cmdStr << endl; bFail = true; @@ -238,9 +242,9 @@ string CreateCmdStr( res << getFirstParam(MAX_NUM_QUERY_PARAM_OPT) << " " << MaxNumQuery << " " << getFirstParam(isRange ? RANGE_PARAM_OPT : KNN_PARAM_OPT) << " " << rangeOrKnnArg << " " - << getFirstParam(METHOD_PARAM_OPT) << " " << testCase.mMethodName << " " - << getFirstParam(INDEX_TIME_PARAMS_PARAM_OPT) << " " << quoteEmpty(testCase.mIndexParams) << " " - << getFirstParam(QUERY_TIME_PARAMS_PARAM_OPT) << " " << quoteEmpty(testCase.mQueryTimeParams); + << getFirstParam(METHOD_PARAM_OPT) << " " << testCase.methodName_ << " " + << getFirstParam(INDEX_TIME_PARAMS_PARAM_OPT) << " " << quoteEmpty(testCase.indexParams_) << " " + << getFirstParam(QUERY_TIME_PARAMS_PARAM_OPT) << " " << quoteEmpty(testCase.queryTypeParams_); return res.str(); }; @@ -251,6 +255,8 @@ string CreateCmdStr( */ template size_t RunTestExper(const vector& vTestCases, + bool bTestReload, + const string& IndexFileNamePrefix, const string& DistType, string SpaceTypeStr, unsigned ThreadTestQty, @@ -269,6 +275,7 @@ size_t RunTestExper(const vector& vTestCases, size_t nFail = 0; + if (!KnnArg.empty()) { if (!SplitStr(KnnArg, knn, ',')) { PREPARE_RUNTIME_ERR(err) << "Wrong format of the knn argument: '" << KnnArg << "' Should be a list of coma-separated int > 0 values."; @@ -340,22 +347,22 @@ size_t RunTestExper(const vector& vTestCases, managerGS.Compute(ThreadTestQty, 0); // Keeping all GS entries, should be Ok here because our data sets are smallish for (size_t MethNum = 0; MethNum < vTestCases.size(); ++MethNum) { - const string& MethodName = vTestCases[MethNum].mMethodName; + const string& MethodName = vTestCases[MethNum].methodName_; shared_ptr IndexParams; vector> vQueryTimeParams; - bool recallOnly = vTestCases[MethNum].mRecallOnly; + bool recallOnly = vTestCases[MethNum].recallOnly_; { vector desc; - ParseArg(vTestCases[MethNum].mIndexParams, desc); + ParseArg(vTestCases[MethNum].indexParams_, desc); IndexParams = shared_ptr(new AnyParams(desc)); } { vector desc; - ParseArg(vTestCases[MethNum].mQueryTimeParams, desc); + ParseArg(vTestCases[MethNum].queryTypeParams_, desc); vQueryTimeParams.push_back(shared_ptr(new AnyParams(desc))); } @@ -381,6 +388,32 @@ size_t RunTestExper(const vector& vTestCases, IndexPtr->CreateIndex(*IndexParams); + if (bTestReload) { + LOG(LIB_INFO) << "Saving the index" ; + + string indexLocAdd = "_" + ConvertToString(TestSetId); + string fullIndexName = IndexFileNamePrefix + indexLocAdd; + + if (DoesFileExist(fullIndexName)) { + CHECK_MSG(std::remove(fullIndexName.c_str()) == 0, + "Failed to delete file '" + fullIndexName + "'") + } + + IndexPtr->SaveIndex(fullIndexName); + + IndexPtr.reset( + MethodFactoryRegistry::Instance(). + CreateMethod(false /* don't print progress */, + MethodName, + SpaceType, config.GetSpace(), + config.GetDataObjects()) + ); + + LOG(LIB_INFO) << "Loading the index" ; + + IndexPtr->LoadIndex(fullIndexName); + } + LOG(LIB_INFO) << "=============================================="; const double vmsize_after = mem_usage_measure.get_vmsize(); @@ -507,6 +540,8 @@ size_t RunTestExper(const vector& vTestCases, } inline bool RunOneTest(const vector& vTestCases, + bool bTestReload, + string IndexFileNamePrefix, string DistType, string SpaceTypeStr, unsigned ThreadTestQty, @@ -522,6 +557,8 @@ inline bool RunOneTest(const vector& vTestCases, ToLower(DistType); if (DIST_TYPE_INT == DistType) { bTestRes = RunTestExper(vTestCases, + bTestReload, + IndexFileNamePrefix, DistType, SpaceTypeStr, ThreadTestQty, @@ -536,6 +573,8 @@ inline bool RunOneTest(const vector& vTestCases, ); } else if (DIST_TYPE_FLOAT == DistType) { bTestRes = RunTestExper(vTestCases, + bTestReload, + IndexFileNamePrefix, DistType, SpaceTypeStr, ThreadTestQty, @@ -550,6 +589,8 @@ inline bool RunOneTest(const vector& vTestCases, ); } else if (DIST_TYPE_DOUBLE == DistType) { bTestRes = RunTestExper(vTestCases, + bTestReload, + IndexFileNamePrefix, DistType, SpaceTypeStr, ThreadTestQty,