From dd4978b499c498fffbfeb5d60e831927bfe650c7 Mon Sep 17 00:00:00 2001 From: searchivairus Date: Thu, 1 Feb 2018 00:16:38 -0500 Subject: [PATCH] Adding random seed reset to improve stability of tests --- similarity_search/apps/tune_vptree.cc | 2 +- similarity_search/include/utils.h | 35 ++++++++++++++++------- similarity_search/src/init.cc | 6 ++-- similarity_search/src/randproj_util.cc | 2 +- similarity_search/src/searchoracle.cc | 2 +- similarity_search/test/test_integr_util.h | 4 +++ 6 files changed, 35 insertions(+), 16 deletions(-) diff --git a/similarity_search/apps/tune_vptree.cc b/similarity_search/apps/tune_vptree.cc index b5d2da9..bc6a22b 100644 --- a/similarity_search/apps/tune_vptree.cc +++ b/similarity_search/apps/tune_vptree.cc @@ -113,7 +113,7 @@ void RunExper(unsigned AddRestartQty, LOG(LIB_INFO) << "We are going to tune parameters for " << MethodName; - static thread_local auto& engine(GET_RANDOM_GENERATOR); + static thread_local auto& engine(getRandomGenerator()); static std::normal_distribution<> normGen(0.0f, log(FullFactor)); AnyParamManager pmgr(IndexParams); diff --git a/similarity_search/include/utils.h b/similarity_search/include/utils.h index 7cfd6b6..0fa279c 100644 --- a/similarity_search/include/utils.h +++ b/similarity_search/include/utils.h @@ -33,6 +33,7 @@ #include #include #include +#include #include "idtype.h" @@ -65,7 +66,6 @@ typedef SSIZE_T ssize_t; #define FIELD_DELIMITER ':' -#define GET_RANDOM_GENERATOR getRandomGenerator() namespace similarity { @@ -75,6 +75,8 @@ using std::stringstream; using namespace std; +typedef std::mt19937 RandomGeneratorType; + const char* GetFileName(const char* fullpath); @@ -84,18 +86,29 @@ bool DoesFileExist(const char *filename); inline bool DoesFileExist(const string &filename) { return DoesFileExist(filename.c_str()); } -extern int randomSeed; /* - * Random number generation is thread safe when respective - * objects are not shared among threads. So, we will keep one - * random number generator per thread. + * 1. Random number generation is thread safe when respective + * objects are not shared among threads. So, we will keep one + * random number generator per thread. + * 2. There is a default seed to initialize all random generators. + * 3. However, sometimes we may want to reset the random number generator + * within a working thread (i.e., this would be only a thread-specific change). + * In particular, this is needed to improve reproducibility of integration tests. */ -template -inline RandGenType & getRandomGenerator() { - static thread_local RandGenType gen(randomSeed); +extern int defaultRandomSeed; +extern thread_local std::unique_ptr randomGen; + +inline void resetRandomGenerator(int newRandomSeed) { + randomGen.reset(new RandomGeneratorType(newRandomSeed)); +} + +inline RandomGeneratorType& getRandomGenerator() { + if (!randomGen) { + resetRandomGenerator(defaultRandomSeed); + } - return gen; + return *randomGen; } // random 32-bit integer number @@ -108,7 +121,7 @@ inline int32_t RandomInt() { // thread_local is static by default, but let's keep it static for clarity static thread_local std::uniform_int_distribution distr(0, std::numeric_limits::max()); - return distr(GET_RANDOM_GENERATOR); + return distr(getRandomGenerator()); } template @@ -122,7 +135,7 @@ inline T RandomReal() { // thread_local is static by default, but let's keep it static for clarity static thread_local std::uniform_real_distribution distr(0, 1); - return distr(GET_RANDOM_GENERATOR); + return distr(getRandomGenerator()); } void RStrip(char* str); diff --git a/similarity_search/src/init.cc b/similarity_search/src/init.cc index 6acf3fe..e25f8ab 100644 --- a/similarity_search/src/init.cc +++ b/similarity_search/src/init.cc @@ -29,13 +29,15 @@ #include "logging.h" #include +#include namespace similarity { -int randomSeed = 0; +int defaultRandomSeed = 0; +thread_local std::unique_ptr randomGen; void initLibrary(int seed, LogChoice choice, const char* pLogFile) { - randomSeed = seed; + defaultRandomSeed = seed; std::ios_base::sync_with_stdio(false); InitializeLogger(choice, pLogFile); diff --git a/similarity_search/src/randproj_util.cc b/similarity_search/src/randproj_util.cc index a01dcf7..7d4f91b 100644 --- a/similarity_search/src/randproj_util.cc +++ b/similarity_search/src/randproj_util.cc @@ -33,7 +33,7 @@ template void initRandProj(size_t nSrcDim, size_t nDstDim, bool bDoOrth, vector>& projMatr) { // Static is thread-safe in C++-11 - static thread_local auto& randGen(GET_RANDOM_GENERATOR); + static thread_local auto& randGen(getRandomGenerator()); static std::normal_distribution<> normGen(0.0f, 1.0f); // 1. Create normally distributed vectors diff --git a/similarity_search/src/searchoracle.cc b/similarity_search/src/searchoracle.cc index 4dd4435..6c72fb4 100644 --- a/similarity_search/src/searchoracle.cc +++ b/similarity_search/src/searchoracle.cc @@ -206,7 +206,7 @@ void PolynomialPruner::SetIndexTimeParams(AnyParamManager& pmgr) { unsigned exp_left = 0, exp_right = 0; - static thread_local auto& randGen(GET_RANDOM_GENERATOR); + static thread_local auto& randGen(getRandomGenerator()); static std::normal_distribution<> normGen(0.0f, log(fullFactor)); diff --git a/similarity_search/test/test_integr_util.h b/similarity_search/test/test_integr_util.h index d2b57a8..cfbca32 100644 --- a/similarity_search/test/test_integr_util.h +++ b/similarity_search/test/test_integr_util.h @@ -337,6 +337,10 @@ size_t RunTestExper(const vector& vTestCases, } } + // For better reproducibility, let's reset + // random number generators. + defaultRandomSeed = 0; // Will affect any new threads + resetRandomGenerator(defaultRandomSeed); // Affects only the current thread for (int TestSetId = 0; TestSetId < config.GetTestSetToRunQty(); ++TestSetId) { config.SelectTestSet(TestSetId);