Skip to content

Commit

Permalink
nearly full reproducibility with a given seed #57
Browse files Browse the repository at this point in the history
  • Loading branch information
searchivairus committed Jan 7, 2018
1 parent b47b22a commit c409c1d
Show file tree
Hide file tree
Showing 17 changed files with 39 additions and 32 deletions.
2 changes: 1 addition & 1 deletion python_bindings/nmslib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ PYBIND11_PLUGIN(nmslib) {
py::module nmslibLogger = logging.attr("getLogger")("nmslib");
setGlobalLogger(new PythonLogger(nmslibLogger));

initLibrary(LIB_LOGCUSTOM, NULL);
initLibrary(0 /* seed */, LIB_LOGCUSTOM, NULL);

py::module m(module_name, "Bindings for Non-Metric Space Library (NMSLIB)");

Expand Down
6 changes: 1 addition & 5 deletions scripts/test_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,7 @@ function do_run {
# Methods that may create an index (at least for some spaces)
do_run 0 "napp" " -c numPivot=512,numPivotIndex=64 " 0 "-t numPivotSearch=40 -t numPivotSearch=42 -t numPivotSearch=44 -t numPivotSearch=46 -t numPivotSearch=48" "napp_${SPACE}.index"
do_run 1 "sw-graph" " -c NN=10 " 0 " -t efSearch=10 -t efSearch=20 -t efSearch=40 -t efSearch=80 -t efSearch=160 -t efSearch=240" "sw-graph_${SPACE}.index"
if [ "$SPACE" = "l2" -o "$SPACE" = "cosinesimil" ] ; then
do_run 1 "hnsw" " -c M=10 " 1 " -t efSearch=10 -t efSearch=20 -t efSearch=40 -t efSearch=80 -t efSearch=160 -t efSearch=240" "hnsw_${SPACE}.index"
else
do_run 1 "hnsw" " -c M=10 " 0 " -t efSearch=10 -t efSearch=20 -t efSearch=40 -t efSearch=80 -t efSearch=160 -t efSearch=240"
fi
do_run 1 "hnsw" " -c M=10 " 1 " -t efSearch=10 -t efSearch=20 -t efSearch=40 -t efSearch=80 -t efSearch=160 -t efSearch=240" "hnsw_${SPACE}.index"

# Methods that do not support creation of an index
do_run 1 "vptree" " -c tuneK=$K,bucketSize=50,desiredRecall=0.99,chunkBucket=1 0 " ""
Expand Down
2 changes: 1 addition & 1 deletion similarity_search/apps/bench_distfunc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1971,7 +1971,7 @@ using namespace similarity;
int main(int argc, char* argv[]) {
string LogFile;
if (argc == 2) LogFile = argv[1];
initLibrary(LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());
initLibrary(0, LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());

int nTest = 0;

Expand Down
2 changes: 1 addition & 1 deletion similarity_search/apps/bench_projection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ int main(int argc, char *argv[]) {
LOG(LIB_FATAL) << "Failed to parse cmd arguments";
}

initLibrary(logFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, logFile.c_str());
initLibrary(0, logFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, logFile.c_str());

LOG(LIB_INFO) << "Program arguments are processed";

Expand Down
2 changes: 1 addition & 1 deletion similarity_search/apps/knn_stat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ int main(int argc, char *argv[]) {
LOG(LIB_FATAL) << "Failed to parse cmd arguments";
}

initLibrary(logFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, logFile.c_str());
initLibrary(0, logFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, logFile.c_str());

LOG(LIB_INFO) << "Program arguments are processed";

Expand Down
2 changes: 1 addition & 1 deletion similarity_search/apps/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ int main(int ac, char* av[]) {
"then you have to specify the gold-standard cache file!");
}

initLibrary(LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());
initLibrary(0, LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());

LOG(LIB_INFO) << "Program arguments are processed";

Expand Down
2 changes: 1 addition & 1 deletion similarity_search/apps/report_intr_dim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ int main(int argc, char* argv[]) {
THROW_RUNTIME_ERR(err);
}

initLibrary(LIB_LOGSTDERR);
initLibrary(0, LIB_LOGSTDERR);

if (DIST_TYPE_INT == distType) {
TestSpace<int>(
Expand Down
2 changes: 1 addition & 1 deletion similarity_search/apps/test_clust.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ int main(int argc, char* argv[]) {
SampleDistQty
);

initLibrary(LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());
initLibrary(0, LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());

ToLower(DistType);
ToLower(ClustType);
Expand Down
2 changes: 1 addition & 1 deletion similarity_search/apps/tune_vptree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ int main(int ac, char* av[]) {
IndexParams,
QueryTimeParams);

initLibrary(LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());
initLibrary(0, LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());

ToLower(DistType);

Expand Down
2 changes: 1 addition & 1 deletion similarity_search/include/init.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

namespace similarity {

void initLibrary(LogChoice choice = LIB_LOGNONE, const char*pLogFile = NULL);
void initLibrary(int seed = 0, LogChoice choice = LIB_LOGNONE, const char*pLogFile = NULL);
}

#endif
5 changes: 2 additions & 3 deletions similarity_search/include/method/hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,8 +481,8 @@ namespace similarity {

int getRandomLevel(double revSize)
{
std::uniform_real_distribution<double> distribution(0.0, 1.0);
double r = -log(distribution(*generator)) * revSize;
// RandomReal is thread-safe
float r = -log(RandomReal<float>()) * revSize;
return (int)r;
}

Expand Down Expand Up @@ -513,7 +513,6 @@ namespace similarity {

//
private:
std::unique_ptr<std::default_random_engine> generator;
size_t M_;
size_t maxM_;
size_t maxM0_;
Expand Down
29 changes: 20 additions & 9 deletions similarity_search/include/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,21 +84,32 @@ bool DoesFileExist(const char *filename);

inline bool DoesFileExist(const string &filename) { return DoesFileExist(filename.c_str()); }

inline int RandomInt() {
// Static is thread-safe in C++ 11
static random_device rdev;
static mt19937 gen(rdev());
static std::uniform_int_distribution<int> distr(0, std::numeric_limits<int>::max());
extern int randomSeed;
// random 32-bit integer number
inline int32_t RandomInt() {
/*
* Random number generation is thread safe when respective
* objects are not shared among threads. So, we will keep one
* random number generator per thread.
*/
// thread_local is static by default, but let's keep it static for clarity
static thread_local mt19937 gen(randomSeed);
static thread_local std::uniform_int_distribution<int32_t> distr(0, std::numeric_limits<int32_t>::max());

return distr(gen);
}

template <class T>
// random real number from 0 (inclusive) to 1 (exclusive)
inline T RandomReal() {
// Static is thread-safe in C++ 11
static random_device rdev;
static mt19937 gen(rdev());
static std::uniform_real_distribution<T> distr(0, 1);
/*
* Random number generation is thread safe when respective
* objects are not shared among threads. So, we will keep one
* random number generator per thread.
*/
// thread_local is static by default, but let's keep it static for clarity
static thread_local mt19937 gen(randomSeed);
static thread_local std::uniform_real_distribution<T> distr(0, 1);

return distr(gen);
}
Expand Down
6 changes: 5 additions & 1 deletion similarity_search/src/init.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@

namespace similarity {

void initLibrary(LogChoice choice, const char* pLogFile) {
int randomSeed = 0;

void initLibrary(int seed, LogChoice choice, const char* pLogFile) {
randomSeed = seed;

std::ios_base::sync_with_stdio(false);
InitializeLogger(choice, pLogFile);
initSpaces();
Expand Down
2 changes: 0 additions & 2 deletions similarity_search/src/method/hnsw.cc
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,6 @@ namespace similarity {
{
AnyParamManager pmgr(IndexParams);

generator.reset(new std::default_random_engine(100));

pmgr.GetParamOptional("M", M_, 16);

// Let's use a generic algorithm by default!
Expand Down
1 change: 0 additions & 1 deletion similarity_search/src/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@

namespace similarity {


const char* GetFileName(const char* fullpath) {
for (int i = strlen(fullpath) - 1; i >= 0; --i) {
if (fullpath[i] == '\\' || fullpath[i] == '/') {
Expand Down
2 changes: 1 addition & 1 deletion similarity_search/test/bunit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ int TestRunner::RunAllTests() {
int main(int argc, char *argv[]) {
std::string LogFile;
if (argc == 2) LogFile = argv[1];
similarity::initLibrary(LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());
similarity::initLibrary(0, LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());

return similarity::TestRunner::Instance().RunAllTests();
}
2 changes: 1 addition & 1 deletion similarity_search/test/test_integr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ int main(int ac, char* av[]) {
string LogFile;
if (ac == 2) LogFile = av[1];

initLibrary(LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());
initLibrary(0, LogFile.empty() ? LIB_LOGSTDERR:LIB_LOGFILE, LogFile.c_str());

WallClockTimer timer;
timer.reset();
Expand Down

0 comments on commit c409c1d

Please sign in to comment.