Skip to content

Commit

Permalink
Now we reset seed to zero before testing each method,
Browse files Browse the repository at this point in the history
which should further improve stability #222.
Also the commit that changed how random number generator is
obtained and reset fixed FALCONN integration tests #154
  • Loading branch information
searchivairus committed Feb 3, 2018
1 parent 5eae3a5 commit 8a20700
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 24 deletions.
55 changes: 31 additions & 24 deletions similarity_search/test/test_integr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,25 @@ using std::stringstream;
#define TEST_HNSW 1
#define TEST_SW_GRAPH 1
#define TEST_IR 1
#define TEST_NAPP 1
#define TEST_OTHER 1
// TODO something is wrong with FALCONN,
// When we run it as a single thing in the binary, it works fine,
// but crashes when we run it jointly with other methods.
#define TEST_FALCONN 0
#define TEST_FALCONN 1

vector<MethodTestCase> vTestCaseDesc = {
#if (TEST_HNSW)
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse", "sparse_5K.txt", "hnsw", true, "efConstruction=50,M=10,skip_optimized_index=1", "ef=50",
// Make sure, it works with huge M
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse", "sparse_5K.txt", "hnsw", true, "efConstruction=50,M=400", "ef=50",
10 /* KNN-10 */, 0 /* no range search */ , 0.98, 0.9999, 0.0, 1, 1.8, 2.2),
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse", "sparse_5K.txt", "hnsw", true, "efConstruction=50,M=10", "ef=50",
10 /* KNN-10 */, 0 /* no range search */ , 0.88, 0.96, 0.0, 1, 6, 12),
MethodTestCase(DIST_TYPE_FLOAT, "angulardist_sparse", "sparse_5K.txt", "hnsw", true, "efConstruction=50,M=10,skip_optimized_index=1", "ef=50",
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse", "sparse_5K.txt", "hnsw", true, "efConstruction=50,M=10", "ef=50",
10 /* KNN-10 */, 0 /* no range search */ , 0.88, 0.96, 0.0, 1, 6, 12),
MethodTestCase(DIST_TYPE_FLOAT, "angulardist_sparse", "sparse_5K.txt", "hnsw", true, "efConstruction=50,M=10", "ef=50",
10 /* KNN-10 */, 0 /* no range search */ , 0.88, 0.96, 0.0, 1, 6, 12),
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil", "final8_10K.txt", "hnsw", true, "efConstruction=50,M=10,skip_optimized_index=1", "ef=50",
10 /* KNN-10 */, 0 /* no range search */ , 0.97, 1, 0, 0.1, 40, 60),
10 /* KNN-10 */, 0 /* no range search */ , 0.96, 1, 0, 0.1, 40, 60),
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "hnsw", true, "efConstruction=50,M=10,skip_optimized_index=1", "ef=50",
10 /* KNN-10 */, 0 /* no range search */ , 0.97, 1, 0, 0.1, 40, 60),
10 /* KNN-10 */, 0 /* no range search */ , 0.96, 1, 0, 0.1, 40, 60),
#endif

#if (TEST_SW_GRAPH)
Expand All @@ -98,6 +101,26 @@ vector<MethodTestCase> vTestCaseDesc = {
10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.001, 395, 510),
#endif

// *************** FALCONN test ***************************** //
#if (TEST_FALCONN)
#ifdef WITH_EXTRAS
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse_fast", "sparse_5K.txt", "falconn", false, "num_hash_tables=20,num_hash_bits=7,feature_hashing_dimension=128,use_falconn_dist=0", "num_probes=20",
1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.79, 0.5, 1.5, 5.75, 6.75),
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil", "final8_10K.txt", "falconn", false, "num_hash_tables=1,num_hash_bits=11,use_falconn_dist=0", "num_probes=1",
1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.75, 2.4, 3.5, 4, 5.5),
#endif
#endif

#if (TEST_NAPP)
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", true, "numPivot=8,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8",
1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01),
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", true, "numPivot=8,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8",
1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01),
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", true, "numPivot=32,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8",
1 /* KNN-1 */, 0 /* no range search */ , 0.6, 0.8, 2.2, 2.7, 28, 33),
#endif


#if (TEST_OTHER)
MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "nonmetr_list_clust", false, "clusterType=clarans,centerQty=10", "dbScanFrac=0.1",
10 /* KNN-10 */, 0 /* no range search */ , 0.85, 0.95, 0.01, 5, 2, 7),
Expand All @@ -106,8 +129,6 @@ vector<MethodTestCase> vTestCaseDesc = {
10 /* KNN-10 */, 0 /* no range search */ , 0.8, 0.92, 0.1, 20, 2.5, 6),
MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "nonmetr_list_clust", false, "clusterType=reduct_clarans,centerQty=10", "dbScanFrac=0.1",
10 /* KNN-10 */, 0 /* no range search */ , 0.85, 0.95, 0.01, 5, 2, 7),
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", true, "numPivot=8,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8",
1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01),

// *************** NEW versions of permutation & projection-based filtering method tests ******************** //
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "seq_search", false, "", "",
Expand Down Expand Up @@ -140,8 +161,6 @@ vector<MethodTestCase> vTestCaseDesc = {
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mi-file", false, "numPivot=16,numPivotIndex=16", "numPivotSearch=16,dbScanFrac=1.0",

1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01),
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", true, "numPivot=8,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8",
1 /* KNN-1 */, 0 /* no range search */ , 0.999, 1.0, 0, 0.01, 0.99, 1.01),

// Binarized permutations
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_incsort_bin", false, "numPivot=32", "dbScanFrac=1.0",
Expand All @@ -168,8 +187,6 @@ vector<MethodTestCase> vTestCaseDesc = {
1 /* KNN-1 */, 0 /* no range search */ , 0.8, 1.0, 0.1, 2, 3, 8),
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "mi-file", false, "numPivot=16,numPivotIndex=16", "numPivotSearch=16,dbScanFrac=0.1",
1 /* KNN-1 */, 0 /* no range search */ , 0.95, 1.0, 0, 0.5, 8, 12),
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "napp", true, "numPivot=32,numPivotIndex=8,chunkIndexSize=102", "numPivotSearch=8",
1 /* KNN-1 */, 0 /* no range search */ , 0.6, 0.8, 2, 4.5, 22, 37),

// Binarized
MethodTestCase(DIST_TYPE_FLOAT, "l2", "final8_10K.txt", "perm_incsort_bin", false, "numPivot=32", "dbScanFrac=0.1",
Expand Down Expand Up @@ -322,16 +339,6 @@ vector<MethodTestCase> vTestCaseDesc = {
1 /* KNN-1 */, 0 /* no range search */ , 0.9, 1.0, 0, 1.0, 5, 12),
#endif

#ifdef WITH_EXTRAS
// *************** FALCONN test ***************************** //
#if (TEST_FALCONN)
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse_fast", "sparse_5K.txt", "falconn", false, "num_hash_tables=20,num_hash_bits=7,feature_hashing_dimension=128,use_falconn_dist=0", "num_probes=20",
1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.79, 0.5, 1.5, 5.75, 6.75),
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil", "final8_10K.txt", "falconn", false, "num_hash_tables=1,num_hash_bits=11,use_falconn_dist=0", "num_probes=1",
1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.75, 2.4, 3.5, 4, 5.5),
#endif
#endif

#endif
};

Expand Down
5 changes: 5 additions & 0 deletions similarity_search/test/test_integr_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,11 @@ size_t RunTestExper(const vector<MethodTestCase>& vTestCases,

LOG(LIB_INFO) << ">>>> Index-time parameters: " << indexParams->ToString();

// Reset random number generator before each method's run.
// Above we reset it before random data split
defaultRandomSeed = 0; // Will affect any new threads
getThreadLocalRandomGenerator().seed(defaultRandomSeed); // Affects only the current thread

for (int testSetId = 0; testSetId < config.GetTestSetToRunQty(); ++testSetId) {
config.SelectTestSet(testSetId);

Expand Down

0 comments on commit 8a20700

Please sign in to comment.