Skip to content

Commit

Permalink
Restructuring integration tests to improve reproducibility and ease o…
Browse files Browse the repository at this point in the history
…f debugging #57
  • Loading branch information
searchivairus committed Feb 2, 2018
1 parent dd4978b commit 15a2b7d
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 102 deletions.
4 changes: 2 additions & 2 deletions similarity_search/test/test_integr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ vector<MethodTestCase> vTestCaseDesc = {
MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "",
10 /* KNN-10 */, 0 /* no range search */ , 0.999, 1.0, 0.0, 0.0, 5.5, 8),
MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "maxLeavesToVisit=10",
1 /* KNN-1 */, 0 /* no range search */ , 0.75, 0.85, 0.3, 1.5, 48, 52),
1 /* KNN-1 */, 0 /* no range search */ , 0.75, 0.85, 0.3, 1.6, 48, 52),
MethodTestCase(DIST_TYPE_FLOAT, "kldivgenfast", "final8_10K.txt", "bbtree", false, "bucketSize=10", "maxLeavesToVisit=20",
10 /* KNN-10 */, 0 /* no range search */ , 0.7, 0.78, 0.3, 1.6, 28, 37),
// range
Expand Down Expand Up @@ -326,7 +326,7 @@ vector<MethodTestCase> vTestCaseDesc = {
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil_sparse_fast", "sparse_5K.txt", "falconn", false, "num_hash_tables=20,num_hash_bits=7,feature_hashing_dimension=128,use_falconn_dist=0", "num_probes=20",
1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.79, 0.5, 1.5, 5.75, 6.75),
MethodTestCase(DIST_TYPE_FLOAT, "cosinesimil", "final8_10K.txt", "falconn", false, "num_hash_tables=1,num_hash_bits=11,use_falconn_dist=0", "num_probes=1",
1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.75, 2.75, 3.5, 4, 5.5),
1 /* KNN-1 */, 0 /* no range search */ , 0.65, 0.75, 2.4, 3.5, 4, 5.5),
#endif
#endif
};
Expand Down
197 changes: 97 additions & 100 deletions similarity_search/test/test_integr_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,11 @@ size_t RunTestExper(const vector<MethodTestCase>& vTestCases,
const string& RangeArg
)
{
// For better reproducibility, let's reset
// random number generators.
defaultRandomSeed = 0; // Will affect any new threads
resetRandomGenerator(defaultRandomSeed); // Affects only the current thread

vector<unsigned> knn;
vector<dist_t> range;

Expand Down Expand Up @@ -323,79 +328,117 @@ size_t RunTestExper(const vector<MethodTestCase>& vTestCases,
std::vector<std::string> MethParams;
vector<double> MemUsage;

vector<vector<MetaAnalysis*>> ExpResRange(config.GetRange().size(),
vector<MetaAnalysis*>(vTestCases.size()));
vector<vector<MetaAnalysis*>> ExpResKNN(config.GetKNN().size(),
vector<MetaAnalysis*>(vTestCases.size()));
vector<unique_ptr<GoldStandardManager<dist_t>>> vManagerGS(config.GetTestSetToRunQty());

for (int testSetId = 0; testSetId < config.GetTestSetToRunQty(); ++testSetId) {
config.SelectTestSet(testSetId);

LOG(LIB_INFO) << ">>>> Computing GS for test set id: " << testSetId << " (set qty: " << config.GetTestSetToRunQty() << ")";

vManagerGS[testSetId].reset(new GoldStandardManager<dist_t>(config));
vManagerGS[testSetId]->Compute(ThreadTestQty, 0); // Keeping all GS entries, should be Ok here because our data sets are smallish
}

for (size_t methNum = 0; methNum < vTestCases.size(); ++methNum) {
const string& methodName = vTestCases[methNum].methodName_;
bool recallOnly = vTestCases[methNum].recallOnly_;

cout << "Testing: " << yellow << methodName << no_color << endl;
LOG(LIB_INFO) << ">>>> Index type : " << methodName;

vector<vector<MetaAnalysis*>> expResRange(config.GetRange().size(),
vector<MetaAnalysis*>(1));
vector<vector<MetaAnalysis*>> expResKNN(config.GetKNN().size(),
vector<MetaAnalysis*>(1));

vector<string> vCmdStrRange(expResRange.size());
vector<string> vCmdStrKNN(expResKNN.size());

cout << yellow << "Command lines:" << no_color << endl;

for (size_t MethNum = 0; MethNum < vTestCases.size(); ++MethNum) {
for (size_t i = 0; i < config.GetRange().size(); ++i) {
ExpResRange[i][MethNum] = new MetaAnalysis(config.GetTestSetToRunQty());
}
for (size_t i = 0; i < config.GetKNN().size(); ++i) {
ExpResKNN[i][MethNum] = new MetaAnalysis(config.GetTestSetToRunQty());
expResRange[i][0] = new MetaAnalysis(config.GetTestSetToRunQty());

vCmdStrRange[i] = CreateCmdStr(vTestCases[methNum],
true,
ConvertToString(config.GetRange()[i]),
DistType,
SpaceTypeStr,
ThreadTestQty,
TestSetQty,
DataFile,
QueryFile,
MaxNumData,
MaxNumQuery,
eps);
cout << vCmdStrRange[i] << endl;
LOG(LIB_INFO) << "Command line params: " << vCmdStrRange[i];
}
}

// For better reproducibility, let's reset
// random number generators.
defaultRandomSeed = 0; // Will affect any new threads
resetRandomGenerator(defaultRandomSeed); // Affects only the current thread
for (size_t i = 0; i < config.GetKNN().size(); ++i) {
expResKNN[i][0] = new MetaAnalysis(config.GetTestSetToRunQty());

for (int TestSetId = 0; TestSetId < config.GetTestSetToRunQty(); ++TestSetId) {
config.SelectTestSet(TestSetId);
vCmdStrKNN[i] = CreateCmdStr(vTestCases[methNum],
false,
ConvertToString(config.GetKNN()[i]),
DistType,
SpaceTypeStr,
ThreadTestQty,
TestSetQty,
DataFile,
QueryFile,
MaxNumData,
MaxNumQuery,
eps);
cout << vCmdStrKNN[i] << endl;
}

LOG(LIB_INFO) << ">>>> Test set id: " << TestSetId << " (set qty: " << config.GetTestSetToRunQty() << ")";
shared_ptr<AnyParams> indexParams;
vector<shared_ptr<AnyParams>> vQueryTimeParams;

GoldStandardManager<dist_t> managerGS(config);
managerGS.Compute(ThreadTestQty, 0); // Keeping all GS entries, should be Ok here because our data sets are smallish

for (size_t MethNum = 0; MethNum < vTestCases.size(); ++MethNum) {
const string& MethodName = vTestCases[MethNum].methodName_;
{
vector<string> desc;
ParseArg(vTestCases[methNum].indexParams_, desc);
indexParams = shared_ptr<AnyParams>(new AnyParams(desc));
}

shared_ptr<AnyParams> IndexParams;
vector<shared_ptr<AnyParams>> vQueryTimeParams;
{
vector<string> desc;
ParseArg(vTestCases[methNum].queryTypeParams_, desc);
vQueryTimeParams.push_back(shared_ptr<AnyParams>(new AnyParams(desc)));
}

bool recallOnly = vTestCases[MethNum].recallOnly_;
LOG(LIB_INFO) << ">>>> Index-time parameters: " << indexParams->ToString();

{
vector<string> desc;
ParseArg(vTestCases[MethNum].indexParams_, desc);
IndexParams = shared_ptr<AnyParams>(new AnyParams(desc));
}
for (int testSetId = 0; testSetId < config.GetTestSetToRunQty(); ++testSetId) {
config.SelectTestSet(testSetId);

{
vector<string> desc;
ParseArg(vTestCases[MethNum].queryTypeParams_, desc);
vQueryTimeParams.push_back(shared_ptr<AnyParams>(new AnyParams(desc)));
}
LOG(LIB_INFO) << ">>>> Test set id: " << testSetId << " (set qty: " << config.GetTestSetToRunQty() << ")";

LOG(LIB_INFO) << ">>>> Index type : " << MethodName;
LOG(LIB_INFO) << ">>>> Index-time parameters: " << IndexParams->ToString();
const GoldStandardManager<dist_t>& managerGS = *vManagerGS[testSetId];

const double vmsize_before = mem_usage_measure.get_vmsize();

WallClockTimer wtm;

wtm.reset();


LOG(LIB_INFO) << "Creating a new index" ;

shared_ptr<Index<dist_t>> IndexPtr(
MethodFactoryRegistry<dist_t>::Instance().
CreateMethod(false /* don't print progress */,
MethodName,
methodName,
SpaceType, config.GetSpace(),
config.GetDataObjects())
);

IndexPtr->CreateIndex(*IndexParams);
IndexPtr->CreateIndex(*indexParams);

if (bTestReload) {
LOG(LIB_INFO) << "Saving the index" ;

string indexLocAdd = "_" + ConvertToString(TestSetId);
string indexLocAdd = "_" + ConvertToString(testSetId);
string fullIndexName = IndexFileNamePrefix + indexLocAdd;

if (DoesFileExist(fullIndexName)) {
Expand All @@ -408,7 +451,7 @@ size_t RunTestExper(const vector<MethodTestCase>& vTestCases,
IndexPtr.reset(
MethodFactoryRegistry<dist_t>::Instance().
CreateMethod(false /* don't print progress */,
MethodName,
methodName,
SpaceType, config.GetSpace(),
config.GetDataObjects())
);
Expand All @@ -433,67 +476,33 @@ size_t RunTestExper(const vector<MethodTestCase>& vTestCases,
LOG(LIB_INFO) << ">>>> Data size: " << data_size << " MBs";
LOG(LIB_INFO) << ">>>> Time elapsed: " << (wtm.elapsed()/double(1e6)) << " sec";

/*
* We need to repackage MetaAnalysis arrays:
* RunAll will deal with only a single method and
* a single set of query-time parameters.
*/

vector<vector<MetaAnalysis*>> ExpResRangeTmp(config.GetRange().size(), vector<MetaAnalysis*>(1));
vector<vector<MetaAnalysis*>> ExpResKNNTmp(config.GetKNN().size(), vector<MetaAnalysis*>(1));


for (size_t i = 0; i < config.GetRange().size(); ++i) {
MetaAnalysis* res = ExpResRange[i][MethNum];
res->SetMem(TestSetId, TotalMemByMethod);
ExpResRangeTmp[i][0] = res;
}
for (size_t i = 0; i < config.GetKNN().size(); ++i) {
MetaAnalysis* res = ExpResKNN[i][MethNum];
res->SetMem(TestSetId, TotalMemByMethod);
ExpResKNNTmp[i][0] = res;
}

CHECK_MSG(vQueryTimeParams.size() == 1,
"Test integration code is currently can execute only one set of query-time parameters!");

Experiments<dist_t>::RunAll(true /* print progress */,
ThreadTestQty,
TestSetId,
testSetId,
managerGS,
recallOnly,
ExpResRangeTmp, ExpResKNNTmp,
expResRange, expResKNN,
config,
*IndexPtr,
vQueryTimeParams);

}
}

}
cout << yellow << "One test complete." << no_color << endl;

for (size_t MethNum = 0; MethNum < vTestCases.size(); ++MethNum) {
string Print, Data, Header;

for (size_t i = 0; i < config.GetRange().size(); ++i) {
MetaAnalysis* res = ExpResRange[i][MethNum];
MetaAnalysis* res = expResRange[i][0];

string cmdStr = CreateCmdStr(vTestCases[MethNum],
true,
ConvertToString(config.GetRange()[i]),
DistType,
SpaceTypeStr,
ThreadTestQty,
TestSetQty,
DataFile,
QueryFile,
MaxNumData,
MaxNumQuery,
eps);
string cmdStr = vCmdStrRange[i];
cout << cmdStr << endl;
LOG(LIB_INFO) << "Command line params: " << cmdStr;
if (!ProcessAndCheckResults(cmdStr,
if (!ProcessAndCheckResults(cmdStr,
DistType, SpaceType,
vTestCases[MethNum], config, *res, Print)) {
vTestCases[methNum], config, *res, Print)) {
nFail++;
cout << red << "failed" << no_color << " (see logs for more details) " << endl;
} else {
Expand All @@ -507,26 +516,14 @@ size_t RunTestExper(const vector<MethodTestCase>& vTestCases,
}

for (size_t i = 0; i < config.GetKNN().size(); ++i) {
MetaAnalysis* res = ExpResKNN[i][MethNum];

string cmdStr = CreateCmdStr(vTestCases[MethNum],
false,
ConvertToString(config.GetKNN()[i]),
DistType,
SpaceTypeStr,
ThreadTestQty,
TestSetQty,
DataFile,
QueryFile,
MaxNumData,
MaxNumQuery,
eps);
MetaAnalysis* res = expResKNN[i][0];

string cmdStr = vCmdStrKNN[i];
cout << cmdStr << endl;
LOG(LIB_INFO) << "Command line params: " << cmdStr;

if (!ProcessAndCheckResults(cmdStr,
DistType, SpaceType,
vTestCases[MethNum], config, *res, Print)) {
vTestCases[methNum], config, *res, Print)) {
cout << red << "failed" << no_color << " (see logs for more details) " << endl;
nFail++;
} else {
Expand Down

0 comments on commit 15a2b7d

Please sign in to comment.