diff --git a/similarity_search/test/test_integr.cc b/similarity_search/test/test_integr.cc index 2d3407c..4af857a 100644 --- a/similarity_search/test/test_integr.cc +++ b/similarity_search/test/test_integr.cc @@ -280,7 +280,7 @@ vector vTestCaseDesc = { MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "", 10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.0, 5.5, 8), MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "maxLeavesToVisit=10", - 1 /* KNN-1 */, 0 /* no range search */ , 0.75, 0.85, 0.3, 1.5, 48, 52), + 1 /* KNN-1 */, 0 /* no range search */ , 0.75, 0.85, 0.3, 1.6, 48, 52), MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "maxLeavesToVisit=20", 10 /* KNN-10 */, 0 /* no range search */ , 0.7, 0.78, 0.3, 1.6, 28, 37), // range @@ -326,7 +326,7 @@ vector vTestCaseDesc = { MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse_fast", "sparse_5K.txt", "falconn", false, "num_hash_tables=20,num_hash_bits=7,feature_hashing_dimension=128,use_falconn_dist=0", "num_probes=20", 1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.79, 0.5, 1.5, 5.75, 6.75), MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil", "final8_10K.txt", "falconn", false, "num_hash_tables=1,num_hash_bits=11,use_falconn_dist=0", "num_probes=1", - 1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.75, 2.75, 3.5, 4, 5.5), + 1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.75, 2.4, 3.5, 4, 5.5), #endif #endif }; diff --git a/similarity_search/test/test_integr_util.h b/similarity_search/test/test_integr_util.h index cfbca32..b8accb4 100644 --- a/similarity_search/test/test_integr_util.h +++ b/similarity_search/test/test_integr_util.h @@ -270,6 +270,11 @@ size_t RunTestExper(const vector& vTestCases, const string& RangeArg ) { + // For better reproducibility, let's reset + // random number generators. + defaultRandomSeed = 0; // Will affect any new threads + resetRandomGenerator(defaultRandomSeed); // Affects only the current thread + vector knn; vector range; @@ -323,55 +328,94 @@ size_t RunTestExper(const vector& vTestCases, std::vector MethParams; vector MemUsage; - vector> ExpResRange(config.GetRange().size(), - vector(vTestCases.size())); - vector> ExpResKNN(config.GetKNN().size(), - vector(vTestCases.size())); + vector>> vManagerGS(config.GetTestSetToRunQty()); + + for (int testSetId = 0; testSetId < config.GetTestSetToRunQty(); ++testSetId) { + config.SelectTestSet(testSetId); + + LOG(LIB_INFO) << ">>>> Computing GS for test set id: " << testSetId << " (set qty: " << config.GetTestSetToRunQty() << ")"; + + vManagerGS[testSetId].reset(new GoldStandardManager(config)); + vManagerGS[testSetId]->Compute(ThreadTestQty, 0); // Keeping all GS entries, should be Ok here because our data sets are smallish + } + + for (size_t methNum = 0; methNum < vTestCases.size(); ++methNum) { + const string& methodName = vTestCases[methNum].methodName_; + bool recallOnly = vTestCases[methNum].recallOnly_; + + cout << "Testing: " << yellow << methodName << no_color << endl; + LOG(LIB_INFO) << ">>>> Index type : " << methodName; + + vector> expResRange(config.GetRange().size(), + vector(1)); + vector> expResKNN(config.GetKNN().size(), + vector(1)); + + vector vCmdStrRange(expResRange.size()); + vector vCmdStrKNN(expResKNN.size()); + + cout << yellow << "Command lines:" << no_color << endl; - for (size_t MethNum = 0; MethNum < vTestCases.size(); ++MethNum) { for (size_t i = 0; i < config.GetRange().size(); ++i) { - ExpResRange[i][MethNum] = new MetaAnalysis(config.GetTestSetToRunQty()); - } - for (size_t i = 0; i < config.GetKNN().size(); ++i) { - ExpResKNN[i][MethNum] = new MetaAnalysis(config.GetTestSetToRunQty()); + expResRange[i][0] = new MetaAnalysis(config.GetTestSetToRunQty()); + + vCmdStrRange[i] = CreateCmdStr(vTestCases[methNum], + true, + ConvertToString(config.GetRange()[i]), + DistType, + SpaceTypeStr, + ThreadTestQty, + TestSetQty, + DataFile, + QueryFile, + MaxNumData, + MaxNumQuery, + eps); + cout << vCmdStrRange[i] << endl; + LOG(LIB_INFO) << "Command line params: " << vCmdStrRange[i]; } - } - // For better reproducibility, let's reset - // random number generators. - defaultRandomSeed = 0; // Will affect any new threads - resetRandomGenerator(defaultRandomSeed); // Affects only the current thread + for (size_t i = 0; i < config.GetKNN().size(); ++i) { + expResKNN[i][0] = new MetaAnalysis(config.GetTestSetToRunQty()); - for (int TestSetId = 0; TestSetId < config.GetTestSetToRunQty(); ++TestSetId) { - config.SelectTestSet(TestSetId); + vCmdStrKNN[i] = CreateCmdStr(vTestCases[methNum], + false, + ConvertToString(config.GetKNN()[i]), + DistType, + SpaceTypeStr, + ThreadTestQty, + TestSetQty, + DataFile, + QueryFile, + MaxNumData, + MaxNumQuery, + eps); + cout << vCmdStrKNN[i] << endl; + } - LOG(LIB_INFO) << ">>>> Test set id: " << TestSetId << " (set qty: " << config.GetTestSetToRunQty() << ")"; + shared_ptr indexParams; + vector> vQueryTimeParams; - GoldStandardManager managerGS(config); - managerGS.Compute(ThreadTestQty, 0); // Keeping all GS entries, should be Ok here because our data sets are smallish - - for (size_t MethNum = 0; MethNum < vTestCases.size(); ++MethNum) { - const string& MethodName = vTestCases[MethNum].methodName_; + { + vector desc; + ParseArg(vTestCases[methNum].indexParams_, desc); + indexParams = shared_ptr(new AnyParams(desc)); + } - shared_ptr IndexParams; - vector> vQueryTimeParams; + { + vector desc; + ParseArg(vTestCases[methNum].queryTypeParams_, desc); + vQueryTimeParams.push_back(shared_ptr(new AnyParams(desc))); + } - bool recallOnly = vTestCases[MethNum].recallOnly_; + LOG(LIB_INFO) << ">>>> Index-time parameters: " << indexParams->ToString(); - { - vector desc; - ParseArg(vTestCases[MethNum].indexParams_, desc); - IndexParams = shared_ptr(new AnyParams(desc)); - } + for (int testSetId = 0; testSetId < config.GetTestSetToRunQty(); ++testSetId) { + config.SelectTestSet(testSetId); - { - vector desc; - ParseArg(vTestCases[MethNum].queryTypeParams_, desc); - vQueryTimeParams.push_back(shared_ptr(new AnyParams(desc))); - } + LOG(LIB_INFO) << ">>>> Test set id: " << testSetId << " (set qty: " << config.GetTestSetToRunQty() << ")"; - LOG(LIB_INFO) << ">>>> Index type : " << MethodName; - LOG(LIB_INFO) << ">>>> Index-time parameters: " << IndexParams->ToString(); + const GoldStandardManager& managerGS = *vManagerGS[testSetId]; const double vmsize_before = mem_usage_measure.get_vmsize(); @@ -379,23 +423,22 @@ size_t RunTestExper(const vector& vTestCases, wtm.reset(); - LOG(LIB_INFO) << "Creating a new index" ; shared_ptr> IndexPtr( MethodFactoryRegistry::Instance(). CreateMethod(false /* don't print progress */, - MethodName, + methodName, SpaceType, config.GetSpace(), config.GetDataObjects()) ); - IndexPtr->CreateIndex(*IndexParams); + IndexPtr->CreateIndex(*indexParams); if (bTestReload) { LOG(LIB_INFO) << "Saving the index" ; - string indexLocAdd = "_" + ConvertToString(TestSetId); + string indexLocAdd = "_" + ConvertToString(testSetId); string fullIndexName = IndexFileNamePrefix + indexLocAdd; if (DoesFileExist(fullIndexName)) { @@ -408,7 +451,7 @@ size_t RunTestExper(const vector& vTestCases, IndexPtr.reset( MethodFactoryRegistry::Instance(). CreateMethod(false /* don't print progress */, - MethodName, + methodName, SpaceType, config.GetSpace(), config.GetDataObjects()) ); @@ -433,67 +476,33 @@ size_t RunTestExper(const vector& vTestCases, LOG(LIB_INFO) << ">>>> Data size: " << data_size << " MBs"; LOG(LIB_INFO) << ">>>> Time elapsed: " << (wtm.elapsed()/double(1e6)) << " sec"; - /* - * We need to repackage MetaAnalysis arrays: - * RunAll will deal with only a single method and - * a single set of query-time parameters. - */ - - vector> ExpResRangeTmp(config.GetRange().size(), vector(1)); - vector> ExpResKNNTmp(config.GetKNN().size(), vector(1)); - - - for (size_t i = 0; i < config.GetRange().size(); ++i) { - MetaAnalysis* res = ExpResRange[i][MethNum]; - res->SetMem(TestSetId, TotalMemByMethod); - ExpResRangeTmp[i][0] = res; - } - for (size_t i = 0; i < config.GetKNN().size(); ++i) { - MetaAnalysis* res = ExpResKNN[i][MethNum]; - res->SetMem(TestSetId, TotalMemByMethod); - ExpResKNNTmp[i][0] = res; - } - CHECK_MSG(vQueryTimeParams.size() == 1, "Test integration code is currently can execute only one set of query-time parameters!"); Experiments::RunAll(true /* print progress */, ThreadTestQty, - TestSetId, + testSetId, managerGS, recallOnly, - ExpResRangeTmp, ExpResKNNTmp, + expResRange, expResKNN, config, *IndexPtr, vQueryTimeParams); - } + } - } + cout << yellow << "One test complete." << no_color << endl; - for (size_t MethNum = 0; MethNum < vTestCases.size(); ++MethNum) { string Print, Data, Header; for (size_t i = 0; i < config.GetRange().size(); ++i) { - MetaAnalysis* res = ExpResRange[i][MethNum]; + MetaAnalysis* res = expResRange[i][0]; - string cmdStr = CreateCmdStr(vTestCases[MethNum], - true, - ConvertToString(config.GetRange()[i]), - DistType, - SpaceTypeStr, - ThreadTestQty, - TestSetQty, - DataFile, - QueryFile, - MaxNumData, - MaxNumQuery, - eps); + string cmdStr = vCmdStrRange[i]; cout << cmdStr << endl; - LOG(LIB_INFO) << "Command line params: " << cmdStr; - if (!ProcessAndCheckResults(cmdStr, + if (!ProcessAndCheckResults(cmdStr, DistType, SpaceType, - vTestCases[MethNum], config, *res, Print)) { + vTestCases[methNum], config, *res, Print)) { nFail++; cout << red << "failed" << no_color << " (see logs for more details) " << endl; } else { @@ -507,26 +516,14 @@ size_t RunTestExper(const vector& vTestCases, } for (size_t i = 0; i < config.GetKNN().size(); ++i) { - MetaAnalysis* res = ExpResKNN[i][MethNum]; - - string cmdStr = CreateCmdStr(vTestCases[MethNum], - false, - ConvertToString(config.GetKNN()[i]), - DistType, - SpaceTypeStr, - ThreadTestQty, - TestSetQty, - DataFile, - QueryFile, - MaxNumData, - MaxNumQuery, - eps); + MetaAnalysis* res = expResKNN[i][0]; + string cmdStr = vCmdStrKNN[i]; cout << cmdStr << endl; - LOG(LIB_INFO) << "Command line params: " << cmdStr; + if (!ProcessAndCheckResults(cmdStr, DistType, SpaceType, - vTestCases[MethNum], config, *res, Print)) { + vTestCases[methNum], config, *res, Print)) { cout << red << "failed" << no_color << " (see logs for more details) " << endl; nFail++; } else {