diff --git a/python_bindings/setup.py b/python_bindings/setup.py index c01800e..88930a9 100755 --- a/python_bindings/setup.py +++ b/python_bindings/setup.py @@ -21,16 +21,14 @@ if os.path.exists(library_file): # if we have a prebuilt nmslib library file, use that. extra_objects.append(library_file) - print("Found: " + os.path.abspath(library_file)) else: - raise RuntimeError("can't find prebuild lib: " + os.path.abspath(library_file)) - # # Otherwise build all the files here directly (excluding extras which need eigen/boost) - # exclude_files = set("""bbtree.cc lsh.cc lsh_multiprobe.cc lsh_space.cc falconn.cc nndes.cc space_sqfd.cc - # dummy_app.cc main.cc""".split()) - # - # for root, subdirs, files in os.walk(os.path.join(libdir, "src")): - # source_files.extend(os.path.join(root, f) for f in files - # if f.endswith(".cc") and f not in exclude_files) + # Otherwise build all the files here directly (excluding extras which need eigen/boost) + exclude_files = set("""bbtree.cc lsh.cc lsh_multiprobe.cc lsh_space.cc falconn.cc nndes.cc space_sqfd.cc + dummy_app.cc main.cc""".split()) + + for root, subdirs, files in os.walk(os.path.join(libdir, "src")): + source_files.extend(os.path.join(root, f) for f in files + if f.endswith(".cc") and f not in exclude_files) if sys.platform.startswith('linux'): diff --git a/python_bindings/tests/bindings_test.py b/python_bindings/tests/bindings_test.py index 4c78875..1e3ec87 100644 --- a/python_bindings/tests/bindings_test.py +++ b/python_bindings/tests/bindings_test.py @@ -6,125 +6,6 @@ import numpy.testing as npt import nmslib -import psutil -import logging -import multiprocessing -import time -import os -import threading - -MB = 1024 * 1024 - - -class StoppableThread(threading.Thread): - """Thread class with a stop() method. The thread itself has to check - regularly for the stopped() condition.""" - - def __init__(self, *args, **kwargs): - super().__init__() - self._stop_event = threading.Event() - - def stop(self): - self._stop_event.set() - - def stopped(self): - return self._stop_event.is_set() - - -class Timer: - """ Context manager for timing named blocks of code """ - def __init__(self, name, logger=None): - self.name = name - self.logger = logger if logger else logging.getLogger() - - def __enter__(self): - self.start = time.time() - self.logger.debug("Starting {}".format(self.name)) - - def __exit__(self, type, value, trace): - self.logger.info("{}: {:0.2f}s".format(self.name, time.time() - self.start)) - - -class PeakMemoryUsage: - class Worker(StoppableThread): - def __init__(self, interval, *args, **kwargs): - super().__init__(*args, **kwargs) - self.interval = interval - self.max_rss = self.max_vms = 0 - - def run(self): - process = psutil.Process() - while not self.stopped(): - mem = process.memory_info() - self.max_rss = max(self.max_rss, mem.rss) - self.max_vms = max(self.max_vms, mem.vms) - time.sleep(self.interval) - - """ Context manager to calculate peak memory usage in a statement block """ - def __init__(self, name, logger=None, interval=1): - self.name = name - self.logger = logger if logger else logging.getLogger() - self.interval = interval - self.start = time.time() - self.worker = None - - def __enter__(self): - if self.interval > 0: - pid = os.getpid() - mem = psutil.Process(pid).memory_info() - self.start_rss, self.start_vms = mem.rss, mem.vms - - self.worker = PeakMemoryUsage.Worker(self.interval) - self.worker.start() - return self - - def __exit__(self, _, value, trace): - if self.worker: - self.worker.stop() - self.worker.join() - self.logger.warning("Peak memory usage for '{}' in MBs: orig=(rss={:0.1f} vms={:0.1f}) " - "peak=(rss={:0.1f} vms={:0.1f}) in {:0.2f}s" - .format(self.name, self.start_rss / MB, self.start_vms / MB, - self.worker.max_rss / MB, - self.worker.max_vms / MB, time.time() - self.start)) - - -class PsUtil(object): - def __init__(self, attr=('virtual_memory',), proc_attr=None, - logger=None, interval=60): - """ attr can be multiple methods of psutil (e.g. attr=['virtual_memory', 'cpu_times_percent']) """ - self.ps_mon = None - self.attr = attr - self.proc_attr = proc_attr - self.logger = logger if logger else logging.getLogger() - self.interval = interval - - def psutil_worker(self, pid): - root_proc = psutil.Process(pid) - while True: - for attr in self.attr: - self.logger.warning("PSUTIL {}".format(getattr(psutil, attr)())) - if self.proc_attr: - procs = set(root_proc.children(recursive=True)) - procs.add(root_proc) - procs = sorted(procs, key=lambda p: p.pid) - - for proc in procs: - self.logger.warning("PSUTIL process={}: {}" - .format(proc.pid, proc.as_dict(self.proc_attr))) - - time.sleep(self.interval) - - def __enter__(self): - if self.interval > 0: - self.ps_mon = multiprocessing.Process(target=self.psutil_worker, args=(os.getpid(),)) - self.ps_mon.start() - time.sleep(1) # sleep so the first iteration doesn't include statements in the PsUtil context - return self - - def __exit__(self, type, value, trace): - if self.ps_mon is not None: - self.ps_mon.terminate() def get_exact_cosine(row, data, N=10): diff --git a/similarity_search/test/test_distfunc.cc b/similarity_search/test/test_distfunc.cc index 668f3d4..252520d 100644 --- a/similarity_search/test/test_distfunc.cc +++ b/similarity_search/test/test_distfunc.cc @@ -63,1189 +63,1189 @@ TEST(set_intel) { */ -//TEST(Platform64) { -// EXPECT_EQ(8 == sizeof(size_t), true); -//} -// -//template -//bool checkElemVectEq(const vector>& source, -// const vector>& target) { -// if (source.size() != target.size()) return false; -// -// for (size_t i = 0; i < source.size(); ++i) -// if (source[i] != target[i]) return false; -// -// return true; -//} -// -//template -//void TestSparsePackUnpack() { -// for (size_t maxSize = 1024 ; maxSize < 1024*1024; maxSize += 8192) { -// vector> source; -// GenSparseVectZipf(maxSize, source); -// -// LOG(LIB_INFO) << "testing maxSize: " << maxSize << "\nqty: " << source.size() -// << " maxId: " << source.back().id_; -// -// char* pBuff = NULL; -// size_t dataLen = 0; -// -// PackSparseElements(source, pBuff, dataLen); -// -// vector> target; -// UnpackSparseElements(pBuff, dataLen, target); -// -// bool eqFlag = checkElemVectEq(source, target); -// -// if (!eqFlag) { -// LOG(LIB_INFO) << "Different source and target, source.size(): " << source.size() -// << " target.size(): " << target.size(); -// // Let's print the first different in the case of equal # of elements -// size_t i = 0; -// for (; i < min(source.size(), target.size()); ++i) { -// if (!(source[i] == target[i])) { -// LOG(LIB_INFO) << "First diff, i = " << i << " " << source[i] << " vs " << target[i]; -// break; -// } -// } -// } -// -// EXPECT_EQ(eqFlag, true); -// } -//} -// -//TEST(BlockZeros) { -// for (size_t id = 0 ; id <= 3*65536; id++) { -// size_t id1 = removeBlockZeros(id); -// -// size_t id2 = addBlockZeros(id1); -// EXPECT_EQ(id, id2); -// } -//} -// -//#ifdef DISABLE_LONG_TESTS -//TEST(DISABLE_SparsePackUnpack) { -//#else -//TEST(SparsePackUnpack) { -//#endif -// TestSparsePackUnpack(); -// TestSparsePackUnpack(); -//} -// -//TEST(TestEfficientPower) { -// double f = 2.0; -// -// for (unsigned i = 1; i <= 64; i++) { -// double p1 = std::pow(f, i); -// double p2 = EfficientPow(f, i); -// -// EXPECT_EQ(p1, p2); -// } -//} -// -//TEST(TestEfficientFract) { -// unsigned MaxNumDig = 16; -// -// for (float a = 1.1f ; a <= 2.0f; a+= 0.1f) { -// for (unsigned NumDig = 1; NumDig < MaxNumDig; ++NumDig) { -// uint64_t MaxFract = uint64_t(1) << NumDig; -// -// for (uint64_t intFract = 0; intFract < MaxFract; ++intFract) { -// float fract = float(intFract) / float(MaxFract); -// float v1 = pow(a, fract); -// float v2 = EfficientFractPow(a, fract, NumDig); -// -// EXPECT_EQ_EPS(v1, v2, 1e-5f); -// } -// } -// } -//} -// -//template -//bool TestScalarProductAgree(size_t N, size_t dim, size_t Rep) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// -// float maxRelDiff = 1e-6f; -// float maxAbsDiff = 1e-6f; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, T(1), T(2), true /* do normalize */); -// GenRandVect(pVect2, dim, T(1), T(2), true /* do normalize */); -// -// T val1 = ScalarProduct(pVect1, pVect2, dim); -// T val2 = ScalarProductSIMD(pVect1, pVect2, dim); -// -// bool bug = false; -// T diff = fabs(val1 - val2); -// T diffRel = diff/max(max(fabs(val1),fabs(val2)),T(1e-18)); -// if (diffRel > maxRelDiff && diff > maxAbsDiff) { -// bug = true; -// cerr << "Bug ScalarProduct !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << " diff=" << diff << " diffRel=" << diffRel << endl; -// } -// -// if (bug) return false; -// } -// } -// -// return true; -//} -// -//template -//bool TestNormScalarProductAgree(size_t N, size_t dim, size_t Rep) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// -// float maxRelDiff = 1e-6f; -// float maxAbsDiff = 1e-6f; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, T(1), T(2), true /* do normalize */); -// GenRandVect(pVect2, dim, T(1), T(2), true /* do normalize */); -// -// T val1 = NormScalarProduct(pVect1, pVect2, dim); -// T val2 = NormScalarProductSIMD(pVect1, pVect2, dim); -// -// bool bug = false; -// T diff = fabs(val1 - val2); -// T diffRel = diff/max(max(fabs(val1),fabs(val2)),T(1e-18)); -// if (diffRel > maxRelDiff && diff > maxAbsDiff) { -// bug = true; -// cerr << "Bug NormScalarProduct !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << " diff=" << diff << " diffRel=" << diffRel << endl; -// } -// -// if (bug) return false; -// } -// } -// -// return true; -//} -// -//// Agreement test functions -//template -//bool TestLInfAgree(size_t N, size_t dim, size_t Rep) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, -T(RANGE), T(RANGE)); -// GenRandVect(pVect2, dim, -T(RANGE), T(RANGE)); -// -// T val1 = LInfNormStandard(pVect1, pVect2, dim); -// T val2 = LInfNorm(pVect1, pVect2, dim); -// T val3 = LInfNormSIMD(pVect1, pVect2, dim); -// -// bool bug = false; -// -// if (fabs(val1 - val2)/max(max(val1,val2),T(1e-18)) > 1e-6) { -// cerr << "Bug LInf !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << endl; -// bug = true; -// } -// if (fabs(val1 - val3)/max(max(val1,val2),T(1e-18)) > 1e-6) { -// cerr << "Bug LInf !!! Dim = " << dim << " val1 = " << val1 << " val3 = " << val3 << endl; -// bug = true; -// } -// if (bug) return false; -// } -// } -// -// -// return true; -//} -// -//template -//bool TestL1Agree(size_t N, size_t dim, size_t Rep) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, -T(RANGE), T(RANGE)); -// GenRandVect(pVect2, dim, -T(RANGE), T(RANGE)); -// -// T val1 = L1NormStandard(pVect1, pVect2, dim); -// T val2 = L1Norm(pVect1, pVect2, dim); -// T val3 = L1NormSIMD(pVect1, pVect2, dim); -// -// bool bug = false; -// -// if (fabs(val1 - val2)/max(max(val1,val2),T(1e-18)) > 1e-6) { -// cerr << "Bug L1 !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << endl; -// bug = true; -// } -// if (fabs(val1 - val3)/max(max(val1,val2),T(1e-18)) > 1e-6) { -// cerr << "Bug L1 !!! Dim = " << dim << " val1 = " << val1 << " val3 = " << val3 << endl; -// bug = true; -// } -// if (bug) return false; -// } -// } -// -// return true; -//} -// -//template -//bool TestL2Agree(size_t N, size_t dim, size_t Rep) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, -T(RANGE), T(RANGE)); -// GenRandVect(pVect2, dim, -T(RANGE), T(RANGE)); -// -// T val1 = L2NormStandard(pVect1, pVect2, dim); -// T val2 = L2Norm(pVect1, pVect2, dim); -// T val3 = L2NormSIMD(pVect1, pVect2, dim); -// -// bool bug = false; -// -// if (fabs(val1 - val2)/max(max(val1,val2),T(1e-18)) > 1e-6) { -// cerr << "Bug L2 !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << endl; -// bug = true; -// } -// if (fabs(val1 - val3)/max(max(val1,val2),T(1e-18)) > 1e-6) { -// cerr << "Bug L2 !!! Dim = " << dim << " val1 = " << val1 << " val3 = " << val3 << endl; -// bug = true; -// } -// if (bug) return false; -// } -// } -// -// -// return true; -//} -// -//template -//bool TestItakuraSaitoAgree(size_t N, size_t dim, size_t Rep) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// vector precompVect1(dim *2), precompVect2(dim * 2); -// T* pPrecompVect1 = &precompVect1[0]; -// T* pPrecompVect2 = &precompVect2[0]; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); -// GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); -// -// copy(pVect1, pVect1 + dim, pPrecompVect1); -// copy(pVect2, pVect2 + dim, pPrecompVect2); -// -// PrecompLogarithms(pPrecompVect1, dim); -// PrecompLogarithms(pPrecompVect2, dim); -// -// T val0 = ItakuraSaito(pVect1, pVect2, dim); -// T val1 = ItakuraSaitoPrecomp(pPrecompVect1, pPrecompVect2, dim); -// T val2 = ItakuraSaitoPrecompSIMD(pPrecompVect1, pPrecompVect2, dim); -// -// bool bug = false; -// -// T AbsDiff1 = fabs(val1 - val0); -// T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); -// -// if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { -// cerr << "Bug ItakuraSaito !!! Dim = " << dim << " val1 = " << val1 << " val0 = " << val0 << " Diff: " << (val1 - val0) << " RelDiff1: " << RelDiff1 << " << AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// T AbsDiff2 = fabs(val1 - val2); -// T RelDiff2 = AbsDiff2/max(max(fabs(val1),fabs(val2)),T(1e-18)); -// if (RelDiff2 > 1e-5 && AbsDiff2 > 1e-5) { -// cerr << "Bug ItakuraSaito !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << " Diff: " << (val1 - val2) << " RelDiff2: " << RelDiff2 << " AbsDiff2: " << AbsDiff2 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// } -// -// -// return true; -//} -// -//template -//bool TestKLAgree(size_t N, size_t dim, size_t Rep) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// vector precompVect1(dim *2), precompVect2(dim * 2); -// T* pPrecompVect1 = &precompVect1[0]; -// T* pPrecompVect2 = &precompVect2[0]; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); -// GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); -// -// copy(pVect1, pVect1 + dim, pPrecompVect1); -// copy(pVect2, pVect2 + dim, pPrecompVect2); -// -// PrecompLogarithms(pPrecompVect1, dim); -// PrecompLogarithms(pPrecompVect2, dim); -// -// T val0 = KLStandard(pVect1, pVect2, dim); -// T val1 = KLStandardLogDiff(pVect1, pVect2, dim); -// T val2 = KLPrecomp(pPrecompVect1, pPrecompVect2, dim); -// T val3 = KLPrecompSIMD(pPrecompVect1, pPrecompVect2, dim); -// -// bool bug = false; -// -// /* -// * KLStandardLog has a worse accuracy due to computing the log of ratios -// * as opposed to difference of logs, but it is more efficient (log can be -// * expensive to compute) -// */ -// -// T AbsDiff1 = fabs(val1 - val0); -// T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); -// if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { -// cerr << "Bug KL !!! Dim = " << dim << " val0 = " << val0 << " val1 = " << val1 << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 << " AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// T AbsDiff2 = fabs(val1 - val2); -// T RelDiff2 = AbsDiff2/max(max(fabs(val1),fabs(val2)),T(1e-18)); -// if (RelDiff2 > 1e-5 && AbsDiff2 > 1e-5) { -// cerr << "Bug KL !!! Dim = " << dim << " val2 = " << val2 << " val1 = " << val1 << " Diff: " << (val2 - val1) << " RelDiff2: " << RelDiff2 << " AbsDiff2: " << AbsDiff2 << endl; -// bug = true; -// } -// -// T AbsDiff3 = fabs(val1 - val3); -// T RelDiff3 = AbsDiff3/max(max(fabs(val1),fabs(val3)),T(1e-18)); -// if (RelDiff3 > 1e-5 && AbsDiff3 > 1e-5) { -// cerr << "Bug KL !!! Dim = " << dim << " val3 = " << val3 << " val1 = " << val1 << " Diff: " << (val3 - val1) << " RelDiff3: " << RelDiff3 << " AbsDiff3: " << AbsDiff3 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// } -// -// -// return true; -//} -// -//template -//bool TestKLGeneralAgree(size_t N, size_t dim, size_t Rep) { -// T* pVect1 = new T[dim]; -// T* pVect2 = new T[dim]; -// T* pPrecompVect1 = new T[dim * 2]; -// T* pPrecompVect2 = new T[dim * 2]; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), false); -// GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), false); -// -// copy(pVect1, pVect1 + dim, pPrecompVect1); -// copy(pVect2, pVect2 + dim, pPrecompVect2); -// -// PrecompLogarithms(pPrecompVect1, dim); -// PrecompLogarithms(pPrecompVect2, dim); -// -// T val0 = KLGeneralStandard(pVect1, pVect2, dim); -// T val2 = KLGeneralPrecomp(pPrecompVect1, pPrecompVect2, dim); -// T val3 = KLGeneralPrecompSIMD(pPrecompVect1, pPrecompVect2, dim); -// -// bool bug = false; -// -// T AbsDiff1 = fabs(val2 - val0); -// T RelDiff1 = AbsDiff1/max(max(fabs(val2),fabs(val0)),T(1e-18)); -// if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { -// cerr << "Bug KL !!! Dim = " << dim << " val0 = " << val0 << " val2 = " << val2 << " Diff: " << (val0 - val2) << " RelDiff1: " << RelDiff1 << " AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// T AbsDiff2 = fabs(val3 - val2); -// T RelDiff2 = AbsDiff2/max(max(fabs(val3),fabs(val2)),T(1e-18)); -// if (RelDiff2 > 1e-5 && AbsDiff2 > 1e-5) { -// cerr << "Bug KL !!! Dim = " << dim << " val2 = " << val2 << " val3 = " << val3 << " Diff: " << (val2 - val3) << " RelDiff2: " << RelDiff2 << " AbsDiff2: " << AbsDiff2 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// } -// -// -// return true; -//} -// -//template -//bool TestJSAgree(size_t N, size_t dim, size_t Rep, double pZero) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// vector precompVect1(dim *2), precompVect2(dim * 2); -// T* pPrecompVect1 = &precompVect1[0]; -// T* pPrecompVect2 = &precompVect2[0]; -// -// T Dist = 0; -// T Error = 0; -// T TotalQty = 0; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); -// SetRandZeros(pVect1, dim, pZero); -// Normalize(pVect1, dim); -// GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); -// SetRandZeros(pVect2, dim, pZero); -// Normalize(pVect2, dim); -// -// copy(pVect1, pVect1 + dim, pPrecompVect1); -// copy(pVect2, pVect2 + dim, pPrecompVect2); -// -// PrecompLogarithms(pPrecompVect1, dim); -// PrecompLogarithms(pPrecompVect2, dim); -// -// T val0 = JSStandard(pVect1, pVect2, dim); -// T val1 = JSPrecomp(pPrecompVect1, pPrecompVect2, dim); -// -// bool bug = false; -// -// T AbsDiff1 = fabs(val1 - val0); -// T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); -// -// if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { -// cerr << "Bug JS (1) " << typeid(T).name() << " !!! Dim = " << dim << " val0 = " << val0 << " val1 = " << val1 << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 << " AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// T val2 = JSPrecompApproxLog(pPrecompVect1, pPrecompVect2, dim); -// T val3 = JSPrecompSIMDApproxLog(pPrecompVect1, pPrecompVect2, dim); -// -// T AbsDiff2 = fabs(val2 - val3); -// T RelDiff2 = AbsDiff2/max(max(fabs(val2),fabs(val3)),T(1e-18)); -// -// if (RelDiff2 > 1e-5 && AbsDiff2 > 1e-5) { -// cerr << "Bug JS (2) " << typeid(T).name() << " !!! Dim = " << dim << " val2 = " << val2 << " val3 = " << val3 << " Diff: " << (val2 - val3) << " RelDiff2: " << RelDiff2 << " AbsDiff2: " << AbsDiff2 << endl; -// bug = true; -// } -// -// T AbsDiff3 = fabs(val1 - val2); -// T RelDiff3 = AbsDiff3/max(max(fabs(val1),fabs(val2)),T(1e-18)); -// -// Dist += val1; -// Error += AbsDiff3; -// ++TotalQty; -// -// if (RelDiff3 > 1e-4 && AbsDiff3 > 1e-4) { -// cerr << "Bug JS (3) " << typeid(T).name() << " !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << " Diff: " << (val1 - val2) << " RelDiff3: " << RelDiff3 << " AbsDiff2: " << AbsDiff3 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// } -// -// LOG(LIB_INFO) << typeid(T).name() << " JS approximation error: average absolute: " << Error / TotalQty << -// " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; -// -// -// return true; -//} -// -//template -//bool TestRenyiDivAgree(size_t N, size_t dim, size_t Rep, T alpha) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// -// T Dist = 0; -// T Error = 0; -// T TotalQty = 0; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); -// GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); -// -// Normalize(pVect1, dim); -// Normalize(pVect2, dim); -// -// T val0 = renyiDivergenceSlow(pVect1, pVect2, dim, alpha); -// T val1 = renyiDivergenceFast(pVect1, pVect2, dim, alpha); -// -// bool bug = false; -// -// T AbsDiff1 = fabs(val1 - val0); -// T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); -// -// Error += AbsDiff1; -// ++TotalQty; -// -// if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { -// cerr << "Bug Reniy Div. (1) " << typeid(T).name() << " !!! Dim = " << dim -// << "alpha=" << alpha << " val0 = " << val0 << " val1 = " << val1 -// << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 -// << " AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// } -// -// LOG(LIB_INFO) << typeid(T).name() << " Renyi Div. approximation error: average absolute: " << Error / TotalQty << -// " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; -// -// -// return true; -//} -// -//template -//bool TestAlphaBetaDivAgree(size_t N, size_t dim, size_t Rep, T alpha, T beta) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// -// T Dist = 0; -// T Error = 0; -// T TotalQty = 0; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); -// GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); -// -// Normalize(pVect1, dim); -// Normalize(pVect2, dim); -// -// T val0 = alphaBetaDivergenceSlow(pVect1, pVect2, dim, alpha, beta); -// T val1 = alphaBetaDivergenceFast(pVect1, pVect2, dim, alpha, beta); -// -// bool bug = false; -// -// T AbsDiff1 = fabs(val1 - val0); -// T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); -// -// Error += AbsDiff1; -// ++TotalQty; -// -// if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { -// cerr << "Bug alpha-beta Div. (1) " << typeid(T).name() << " !!! Dim = " << dim -// << "alpha=" << alpha << " val0 = " << val0 << " val1 = " << val1 -// << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 -// << " AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// } -// -// LOG(LIB_INFO) << typeid(T).name() << " alpha-beta div. approximation error: average absolute: " << Error / TotalQty << " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; -// -// -// return true; -//} -// -//bool TestSpearmanFootruleAgree(size_t N, size_t dim, size_t Rep) { -// vector vect1(dim), vect2(dim); -// PivotIdType* pVect1 = &vect1[0]; -// PivotIdType* pVect2 = &vect2[0]; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandIntVect(pVect1, dim); -// GenRandIntVect(pVect2, dim); -// -// int val0 = SpearmanFootrule(pVect1, pVect2, dim); -// int val1 = SpearmanFootruleSIMD(pVect1, pVect2, dim); -// -// bool bug = false; -// -// -// if (val0 != val1) { -// cerr << "Bug SpearmanFootrule !!! Dim = " << dim << " val0 = " << val0 << " val1 = " << val1 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// } -// -// -// return true; -//} -// -//bool TestSpearmanRhoAgree(size_t N, size_t dim, size_t Rep) { -// vector vect1(dim), vect2(dim); -// PivotIdType* pVect1 = &vect1[0]; -// PivotIdType* pVect2 = &vect2[0]; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandIntVect(pVect1, dim); -// GenRandIntVect(pVect2, dim); -// -// int val0 = SpearmanRho(pVect1, pVect2, dim); -// int val1 = SpearmanRhoSIMD(pVect1, pVect2, dim); -// -// bool bug = false; -// -// -// if (val0 != val1) { -// cerr << "Bug SpearmanRho !!! Dim = " << dim << " val0 = " << val0 << " val1 = " << val1 << " Diff: " << (val0 - val1) << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// } -// -// -// return true; -//} -// -//template -//bool TestLPGenericAgree(size_t N, size_t dim, size_t Rep, T power) { -// vector vect1(dim), vect2(dim); -// T* pVect1 = &vect1[0]; -// T* pVect2 = &vect2[0]; -// -// T TotalQty = 0, Error = 0, Dist = 0; -// -// for (size_t i = 0; i < Rep; ++i) { -// for (size_t j = 1; j < N; ++j) { -// GenRandVect(pVect1, dim, -T(RANGE), T(RANGE)); -// GenRandVect(pVect2, dim, -T(RANGE), T(RANGE)); -// -// T val0 = LPGenericDistance(pVect1, pVect2, dim, power); -// T val1 = LPGenericDistanceOptim(pVect1, pVect2, dim, power); -// -// bool bug = false; -// -// T AbsDiff1 = fabs(val1 - val0); -// T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); -// -// T maxRelDiff = 1e-5f; -// T maxAbsDiff = 1e-5f; -// /* -// * For large powers, the difference can be larger, -// * because our approximations are efficient, but not very -// * precise -// */ -// if (power > 8) { maxAbsDiff = maxRelDiff = 1e-3f;} -// if (power > 12) { maxAbsDiff = maxRelDiff = 0.01f;} -// if (power > 22) { maxAbsDiff = maxRelDiff = 0.1f;} -// -// ++TotalQty; -// Error += RelDiff1; -// Dist += val0; -// -// if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { -// cerr << "Bug LP" << power << " !!! Dim = " << dim << -// " val1 = " << val1 << " val0 = " << val0 << -// " Diff: " << (val1 - val0) << -// " RelDiff1: " << RelDiff1 << -// " (max for this power: " << maxRelDiff << ") " << -// " AbsDiff1: " << AbsDiff1 << " (max for this power: " << maxAbsDiff << ")" << endl; -// } -// -// if (bug) return false; -// } -// } -// -// if (power < 4) { -// LOG(LIB_INFO) << typeid(T).name() << " LP approximation error: average absolute " << Error / TotalQty << " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; -// -// } -// -// return true; -//} -// -//bool TestBitHammingAgree(size_t N, size_t dim, size_t Rep) { -// size_t WordQty = (dim + 31)/32; -// vector arr(N * WordQty); -// uint32_t* pArr = &arr[0]; -// -// uint32_t *p = pArr; -// for (size_t i = 0; i < N; ++i, p+= WordQty) { -// vector perm(dim); -// GenRandIntVect(&perm[0], dim); -// for (unsigned j = 0; j < dim; ++j) -// perm[j] = perm[j] % 2; -// vector h; -// Binarize(perm, 1, h); -// CHECK(h.size() == WordQty); -// memcpy(p, &h[0], WordQty * sizeof(h[0])); -// } -// -// WallClockTimer t; -// -// t.reset(); -// -// bool res = true; -// -// for (size_t j = 1; j < N; ++j) { -// uint32_t* pVect1 = pArr + j*WordQty; -// uint32_t* pVect2 = pArr + (j-1)*WordQty; -// int d1 = BitHamming(pVect1, pVect2, WordQty); -// int d2 = 0; -// -// for (unsigned t = 0; t < WordQty; ++t) { -// for (unsigned k = 0; k < 32; ++k) { -// d2 += ((pVect1[t]>>k)&1) != ((pVect2[t]>>k)&1); -// } -// } -// if (d1 != d2) { -// cerr << "Bug bit hamming, WordQty = " << WordQty << " d1 = " << d1 << " d2 = " << d2 << endl; -// res = false; -// break; -// } -// } -// -// return res; -//} -// -// -//bool TestSparseAngularDistanceAgree(const string& dataFile, size_t N, size_t Rep) { -// typedef float T; -// -// unique_ptr spaceFast(new SpaceSparseAngularDistanceFast()); -// unique_ptr> spaceReg(new SpaceSparseAngularDistance()); -// -// ObjectVector elemsFast; -// ObjectVector elemsReg; -// vector tmp; -// -// unique_ptr inpStateFast(spaceFast->ReadDataset(elemsFast, tmp, dataFile, N)); -// spaceFast->UpdateParamsFromFile(*inpStateFast); -// unique_ptr inpStateReg(spaceReg->ReadDataset(elemsReg, tmp, dataFile, N)); -// spaceReg->UpdateParamsFromFile(*inpStateReg); -// -// CHECK(elemsFast.size() == elemsReg.size()); -// -// N = min(N, elemsReg.size()); -// -// bool bug = false; -// -// float maxRelDiff = 2e-5f; -// float maxAbsDiff = 1e-6f; -// -// for (size_t j = Rep; j < N; ++j) -// for (size_t k = j - Rep; k < j; ++k) { -// float val1 = spaceFast->IndexTimeDistance(elemsFast[k], elemsFast[j]); -// float val2 = spaceReg->IndexTimeDistance(elemsReg[k], elemsReg[j]); -// -// float AbsDiff1 = fabs(val1 - val2); -// float RelDiff1 = AbsDiff1 / max(max(fabs(val1), fabs(val2)), T(1e-18)); -// -// if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { -// cerr << "Bug fast vs non-fast angular dist " << -// " val1 = " << val1 << " val2 = " << val2 << -// " Diff: " << (val1 - val2) << -// " RelDiff1: " << RelDiff1 << -// " AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// -// return true; -//} -// -// -// -//bool TestSparseCosineSimilarityAgree(const string& dataFile, size_t N, size_t Rep) { -// typedef float T; -// -// unique_ptr spaceFast(new SpaceSparseCosineSimilarityFast()); -// unique_ptr> spaceReg (new SpaceSparseCosineSimilarity()); -// -// ObjectVector elemsFast; -// ObjectVector elemsReg; -// vector tmp; -// -// unique_ptr inpStateFast(spaceFast->ReadDataset(elemsFast, tmp, dataFile, N)); -// spaceFast->UpdateParamsFromFile(*inpStateFast); -// unique_ptr inpStateReg(spaceReg->ReadDataset(elemsReg, tmp, dataFile, N)); -// spaceReg->UpdateParamsFromFile(*inpStateReg); -// -// CHECK(elemsFast.size() == elemsReg.size()); -// -// N = min(N, elemsReg.size()); -// -// bool bug = false; -// -// float maxRelDiff = 1e-5f; -// float maxAbsDiff = 1e-5f; -// -// for (size_t j = Rep; j < N; ++j) -// for (size_t k = j - Rep; k < j; ++k) { -// float val1 = spaceFast->IndexTimeDistance(elemsFast[k], elemsFast[j]); -// float val2 = spaceReg->IndexTimeDistance(elemsReg[k], elemsReg[j]); -// -// float AbsDiff1 = fabs(val1 - val2); -// float RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val2)),T(1e-18)); -// -// if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { -// cerr << "Bug fast vs non-fast cosine " << -// " val1 = " << val1 << " val2 = " << val2 << -// " Diff: " << (val1 - val2) << -// " RelDiff1: " << RelDiff1 << -// " AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// -// return true; -//} -// -//bool TestSparseNegativeScalarProductAgree(const string& dataFile, size_t N, size_t Rep) { -// typedef float T; -// -// unique_ptr spaceFast(new SpaceSparseNegativeScalarProductFast()); -// unique_ptr> spaceReg (new SpaceSparseNegativeScalarProduct()); -// -// ObjectVector elemsFast; -// ObjectVector elemsReg; -// vector tmp; -// -// unique_ptr inpStateFast(spaceFast->ReadDataset(elemsFast, tmp, dataFile, N)); -// spaceFast->UpdateParamsFromFile(*inpStateFast); -// unique_ptr inpStateReg(spaceReg->ReadDataset(elemsReg, tmp, dataFile, N)); -// spaceReg->UpdateParamsFromFile(*inpStateReg); -// -// CHECK(elemsFast.size() == elemsReg.size()); -// -// N = min(N, elemsReg.size()); -// -// bool bug = false; -// -// float maxRelDiff = 1e-6f; -// float maxAbsDiff = 1e-6f; -// -// for (size_t j = Rep; j < N; ++j) -// for (size_t k = j - Rep; k < j; ++k) { -// float val1 = spaceFast->IndexTimeDistance(elemsFast[k], elemsFast[j]); -// float val2 = spaceReg->IndexTimeDistance(elemsReg[k], elemsReg[j]); -// -// float AbsDiff1 = fabs(val1 - val2); -// float RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val2)),T(1e-18)); -// -// if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { -// cerr << "Bug fast vs non-fast negative scalar/dot product " << -// " val1 = " << val1 << " val2 = " << val2 << -// " Diff: " << (val1 - val2) << -// " RelDiff1: " << RelDiff1 << -// " AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// -// return true; -//} -// -//bool TestSparseQueryNormNegativeScalarProductAgree(const string& dataFile, size_t N, size_t Rep) { -// typedef float T; -// -// unique_ptr spaceFast(new SpaceSparseQueryNormNegativeScalarProductFast()); -// unique_ptr> spaceReg (new SpaceSparseQueryNormNegativeScalarProduct()); -// -// ObjectVector elemsFast; -// ObjectVector elemsReg; -// vector tmp; -// -// unique_ptr inpStateFast(spaceFast->ReadDataset(elemsFast, tmp, dataFile, N)); -// spaceFast->UpdateParamsFromFile(*inpStateFast); -// unique_ptr inpStateReg(spaceReg->ReadDataset(elemsReg, tmp, dataFile, N)); -// spaceReg->UpdateParamsFromFile(*inpStateReg); -// -// CHECK(elemsFast.size() == elemsReg.size()); -// -// N = min(N, elemsReg.size()); -// -// bool bug = false; -// -// float maxRelDiff = 1e-6f; -// float maxAbsDiff = 1e-6f; -// -// for (size_t j = Rep; j < N; ++j) -// for (size_t k = j - Rep; k < j; ++k) { -// float val1 = spaceFast->IndexTimeDistance(elemsFast[k], elemsFast[j]); -// float val2 = spaceReg->IndexTimeDistance(elemsReg[k], elemsReg[j]); -// -// float AbsDiff1 = fabs(val1 - val2); -// float RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val2)),T(1e-18)); -// -// if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { -// cerr << "Bug fast vs non-fast QUERY-NORMALIZED negative scalar/dot product " << -// " val1 = " << val1 << " val2 = " << val2 << -// " Diff: " << (val1 - val2) << -// " RelDiff1: " << RelDiff1 << -// " AbsDiff1: " << AbsDiff1 << endl; -// bug = true; -// } -// -// if (bug) return false; -// } -// -// return true; -//} -// -//// Limitation: this is only for spaces without params -//bool TestPivotIndex(const string& spaceName, -// bool useDummyIndex, -// const string& dataFile, size_t dataQty, -// const string& pivotFile, size_t pivotQty) { -// -// LOG(LIB_INFO) << "space: " << spaceName << " real pivot index?: " << !useDummyIndex << " " << -// " dataFile: " << dataFile << " " << -// " pivotFile: " << pivotFile; -// try { -// typedef float T; -// -// AnyParams emptyParams; -// -// unique_ptr> space(SpaceFactoryRegistry::Instance().CreateSpace(spaceName, emptyParams)); -// -// ObjectVector data; -// ObjectVector pivots; -// vector tmp; -// -// float maxRelDiff = 1e-6f; -// float maxAbsDiff = 1e-6f; -// -// unique_ptr inpStateFast(space->ReadDataset(data, tmp, dataFile, dataQty)); -// space->UpdateParamsFromFile(*inpStateFast); -// space->ReadDataset(pivots, tmp, pivotFile, pivotQty); -// -// unique_ptr> pivIndx(useDummyIndex ? -// new DummyPivotIndex(*space, pivots) -// : -// space->CreatePivotIndex(pivots, -// 0 /* Let's not test using the hashing trick here, b/c distances would be somewhat different */)); -// -// for (size_t did = 0; did < dataQty; ++did) { -// vector vDst; -// pivIndx->ComputePivotDistancesIndexTime(data[did], vDst); -// CHECK_MSG(vDst.size() == pivotQty, "ComputePivotDistancesIndexTime returns incorrect # of elements different from the # of pivots"); -// -// for (size_t pid = 0; pid < pivotQty; ++pid) { -// T val2 = space->IndexTimeDistance(pivots[pid], data[did]); -// T val1 = vDst[pid]; -// -// float AbsDiff1 = fabs(val1 - val2); -// float RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val2)),T(1e-18)); -// -// if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { -// cerr << "Bug in fast computation of all-pivot distance, " << -// " space: " << spaceName << " real pivot index?: " << !useDummyIndex << endl << -// " dataFile: " << dataFile << endl << -// " pivotFile: " << pivotFile << endl << -// " data index: " << did << " pivot index: " << pid << endl << -// " val1 = " << val1 << " val2 = " << val2 << -// " Diff: " << (val1 - val2) << -// " RelDiff1: " << RelDiff1 << -// " AbsDiff1: " << AbsDiff1 << endl; -// return false; -// } -// } -// } -// } catch (const exception& e) { -// LOG(LIB_INFO) << "Got exception while testing: " << e.what(); -// return false; -// } -// return true; -//} -// -// -// -// -//#ifdef DISABLE_LONG_TESTS -//TEST(DISABLE_TestAgree) { -//#else -//TEST(TestAgree) { -//#endif -// int nTest = 0; -// int nFail = 0; -// -// nTest++; -// nFail += !TestSparseAngularDistanceAgree(sampleDataPrefix + "sparse_5K.txt", 1000, 200); -// -// nTest++; -// nFail += !TestSparseAngularDistanceAgree(sampleDataPrefix + "sparse_wiki_5K.txt", 1000, 200); -// -// nTest++; -// nFail += !TestSparseCosineSimilarityAgree(sampleDataPrefix + "sparse_5K.txt", 1000, 200); -// -// nTest++; -// nFail += !TestSparseCosineSimilarityAgree(sampleDataPrefix + "sparse_wiki_5K.txt", 1000, 200); -// -// -// nTest++; -// nFail += !TestSparseNegativeScalarProductAgree(sampleDataPrefix + "sparse_5K.txt", 1000, 200); -// -// nTest++; -// nFail += !TestSparseNegativeScalarProductAgree(sampleDataPrefix + "sparse_wiki_5K.txt", 1000, 200); -// -// nTest++; -// nFail += !TestSparseQueryNormNegativeScalarProductAgree(sampleDataPrefix + "sparse_5K.txt", 1000, 200); -// -// nTest++; -// nFail += !TestSparseQueryNormNegativeScalarProductAgree(sampleDataPrefix + "sparse_wiki_5K.txt", 1000, 200); -// -// -// /* -// * 32 should be more than enough for almost all methods, -// * where loop-unrolling includes at most 16 distance computations. -// * -// * Bit-Hamming is an exception. -// * -// */ -// for (unsigned dim = 1; dim <= 1024; dim+=2) { -// LOG(LIB_INFO) << "Dim = " << dim; -// -// nFail += !TestBitHammingAgree(1000, dim, 1000); -// } -// -// for (unsigned dim = 1; dim <= 32; ++dim) { -// LOG(LIB_INFO) << "Dim = " << dim; -// -// /* -// * This is a costly check, we don't need to do it for large # dimensions. -// * Anyways, the function is not using any loop unrolling, so 8 should be sufficient. -// */ -// if (dim <= 8) { -// -// for (float power = 0.125; power <= 32; power += 0.125) { -// TestLPGenericAgree(1024, dim, 10, power); -// } -// for (double power = 0.125; power <= 32; power += 0.125) { -// TestLPGenericAgree(1024, dim, 10, power); -// } -// -// // In the case of Renyi divergence 0 < alpha < 1, 1 < alpha < infinity -// // https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy#R%C3%A9nyi_divergence -// for (float alpha = 0.125; alpha <= 2; alpha += 0.125) { -// if (fabs(alpha - 1) < 1e-6) continue; -// TestRenyiDivAgree(1024, dim, 10, alpha); -// } -// for (double alpha = 0.125; alpha <= 2; alpha += 0.125) { -// if (fabs(alpha - 1) < 1e-6) continue; -// TestRenyiDivAgree(1024, dim, 10, alpha); -// } -// -// for (float alpha = -2; alpha <= 2; alpha += 0.5) -// for (float beta = -2; beta <= 2; beta += 0.5) -// { -// TestAlphaBetaDivAgree(1024, dim, 10, alpha, beta); -// } -// -// for (double alpha = -2; alpha <= 2; alpha += 0.5) -// for (double beta = -2; beta <= 2; beta += 0.5) -// { -// TestAlphaBetaDivAgree(1024, dim, 10, alpha, beta); -// } -// } -// -// nTest++; -// nFail += !TestNormScalarProductAgree(1024, dim, 10); -// nTest++; -// nFail += !TestNormScalarProductAgree(1024, dim, 10); -// -// nTest++; -// nFail += !TestScalarProductAgree(1024, dim, 10); -// nTest++; -// nFail += !TestScalarProductAgree(1024, dim, 10); -// -// nTest++; -// nFail += !TestSpearmanFootruleAgree(1024, dim, 10); -// -// nTest++; -// nFail += !TestSpearmanRhoAgree(1024, dim, 10); -// -// nTest++; -// nFail += !TestJSAgree(1024, dim, 10, 0.5); -// nTest++; -// nFail += !TestJSAgree(1024, dim, 10, 0.5); -// -// nTest++; -// nFail += !TestKLGeneralAgree(1024, dim, 10); -// nTest++; -// nFail += !TestKLGeneralAgree(1024, dim, 10); -// -// nTest++; -// nFail += !TestLInfAgree(1024, dim, 10); -// nTest++; -// nFail += !TestLInfAgree(1024, dim, 10); -// -// nTest++; -// nFail += !TestL1Agree(1024, dim, 10); -// nTest++; -// nFail += !TestL1Agree(1024, dim, 10); -// -// nTest++; -// nFail += !TestL2Agree(1024, dim, 10); -// nTest++; -// nFail += !TestL2Agree(1024, dim, 10); -// -// nTest++; -// nFail += !TestKLAgree(1024, dim, 10); -// nTest++; -// nFail += !TestKLAgree(1024, dim, 10); -// -// nTest++; -// nFail += !TestItakuraSaitoAgree(1024, dim, 10); -// nTest++; -// nFail += !TestItakuraSaitoAgree(1024, dim, 10); -// } -// -// LOG(LIB_INFO) << nTest << " (sub) tests performed " << nFail << " failed"; -// -// EXPECT_EQ(0, nFail); -//} -// -//#ifdef DISABLE_LONG_TESTS -//TEST(DISABLE_TestAgreePivotIndex) { -//#else -//TEST(TestAgreePivotIndex) { -//#endif -// int nTest = 0; -// int nFail = 0; -// -// const size_t dataQty = 1000; -// const size_t pivotQty = 100; -// -// vector vDataFiles = {"sparse_5K.txt", "sparse_wiki_5K.txt"}; -// vector vSpaces = {SPACE_SPARSE_COSINE_SIMILARITY_FAST, SPACE_SPARSE_ANGULAR_DISTANCE_FAST, -// SPACE_SPARSE_NEGATIVE_SCALAR_FAST, SPACE_SPARSE_QUERY_NORM_NEGATIVE_SCALAR_FAST}; -// const string pivotFile = "sparse_pivots1K_termQty5K_maxId_100K.txt"; -// -// for (string spaceName : vSpaces) -// for (string dataFile : vDataFiles) { -// // 1. test with a dummy pivot index -// nTest++; -// nFail += !TestPivotIndex(spaceName, true, sampleDataPrefix + dataFile, dataQty, sampleDataPrefix + pivotFile, pivotQty); -// -// // 2. test with a real pivot index -// nTest++; -// nFail += !TestPivotIndex(spaceName, false, sampleDataPrefix + dataFile, dataQty, sampleDataPrefix + pivotFile, pivotQty); -// } -// -// LOG(LIB_INFO) << nTest << " (sub) tests performed " << nFail << " failed"; -// -// EXPECT_EQ(0, nFail); -//} -// -// +TEST(Platform64) { + EXPECT_EQ(8 == sizeof(size_t), true); +} + +template +bool checkElemVectEq(const vector>& source, + const vector>& target) { + if (source.size() != target.size()) return false; + + for (size_t i = 0; i < source.size(); ++i) + if (source[i] != target[i]) return false; + + return true; +} + +template +void TestSparsePackUnpack() { + for (size_t maxSize = 1024 ; maxSize < 1024*1024; maxSize += 8192) { + vector> source; + GenSparseVectZipf(maxSize, source); + + LOG(LIB_INFO) << "testing maxSize: " << maxSize << "\nqty: " << source.size() + << " maxId: " << source.back().id_; + + char* pBuff = NULL; + size_t dataLen = 0; + + PackSparseElements(source, pBuff, dataLen); + + vector> target; + UnpackSparseElements(pBuff, dataLen, target); + + bool eqFlag = checkElemVectEq(source, target); + + if (!eqFlag) { + LOG(LIB_INFO) << "Different source and target, source.size(): " << source.size() + << " target.size(): " << target.size(); + // Let's print the first different in the case of equal # of elements + size_t i = 0; + for (; i < min(source.size(), target.size()); ++i) { + if (!(source[i] == target[i])) { + LOG(LIB_INFO) << "First diff, i = " << i << " " << source[i] << " vs " << target[i]; + break; + } + } + } + + EXPECT_EQ(eqFlag, true); + } +} + +TEST(BlockZeros) { + for (size_t id = 0 ; id <= 3*65536; id++) { + size_t id1 = removeBlockZeros(id); + + size_t id2 = addBlockZeros(id1); + EXPECT_EQ(id, id2); + } +} + +#ifdef DISABLE_LONG_TESTS +TEST(DISABLE_SparsePackUnpack) { +#else +TEST(SparsePackUnpack) { +#endif + TestSparsePackUnpack(); + TestSparsePackUnpack(); +} + +TEST(TestEfficientPower) { + double f = 2.0; + + for (unsigned i = 1; i <= 64; i++) { + double p1 = std::pow(f, i); + double p2 = EfficientPow(f, i); + + EXPECT_EQ(p1, p2); + } +} + +TEST(TestEfficientFract) { + unsigned MaxNumDig = 16; + + for (float a = 1.1f ; a <= 2.0f; a+= 0.1f) { + for (unsigned NumDig = 1; NumDig < MaxNumDig; ++NumDig) { + uint64_t MaxFract = uint64_t(1) << NumDig; + + for (uint64_t intFract = 0; intFract < MaxFract; ++intFract) { + float fract = float(intFract) / float(MaxFract); + float v1 = pow(a, fract); + float v2 = EfficientFractPow(a, fract, NumDig); + + EXPECT_EQ_EPS(v1, v2, 1e-5f); + } + } + } +} + +template +bool TestScalarProductAgree(size_t N, size_t dim, size_t Rep) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + float maxRelDiff = 1e-6f; + float maxAbsDiff = 1e-6f; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(1), T(2), true /* do normalize */); + GenRandVect(pVect2, dim, T(1), T(2), true /* do normalize */); + + T val1 = ScalarProduct(pVect1, pVect2, dim); + T val2 = ScalarProductSIMD(pVect1, pVect2, dim); + + bool bug = false; + T diff = fabs(val1 - val2); + T diffRel = diff/max(max(fabs(val1),fabs(val2)),T(1e-18)); + if (diffRel > maxRelDiff && diff > maxAbsDiff) { + bug = true; + cerr << "Bug ScalarProduct !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << " diff=" << diff << " diffRel=" << diffRel << endl; + } + + if (bug) return false; + } + } + + return true; +} + +template +bool TestNormScalarProductAgree(size_t N, size_t dim, size_t Rep) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + float maxRelDiff = 1e-6f; + float maxAbsDiff = 1e-6f; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(1), T(2), true /* do normalize */); + GenRandVect(pVect2, dim, T(1), T(2), true /* do normalize */); + + T val1 = NormScalarProduct(pVect1, pVect2, dim); + T val2 = NormScalarProductSIMD(pVect1, pVect2, dim); + + bool bug = false; + T diff = fabs(val1 - val2); + T diffRel = diff/max(max(fabs(val1),fabs(val2)),T(1e-18)); + if (diffRel > maxRelDiff && diff > maxAbsDiff) { + bug = true; + cerr << "Bug NormScalarProduct !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << " diff=" << diff << " diffRel=" << diffRel << endl; + } + + if (bug) return false; + } + } + + return true; +} + +// Agreement test functions +template +bool TestLInfAgree(size_t N, size_t dim, size_t Rep) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, -T(RANGE), T(RANGE)); + GenRandVect(pVect2, dim, -T(RANGE), T(RANGE)); + + T val1 = LInfNormStandard(pVect1, pVect2, dim); + T val2 = LInfNorm(pVect1, pVect2, dim); + T val3 = LInfNormSIMD(pVect1, pVect2, dim); + + bool bug = false; + + if (fabs(val1 - val2)/max(max(val1,val2),T(1e-18)) > 1e-6) { + cerr << "Bug LInf !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << endl; + bug = true; + } + if (fabs(val1 - val3)/max(max(val1,val2),T(1e-18)) > 1e-6) { + cerr << "Bug LInf !!! Dim = " << dim << " val1 = " << val1 << " val3 = " << val3 << endl; + bug = true; + } + if (bug) return false; + } + } + + + return true; +} + +template +bool TestL1Agree(size_t N, size_t dim, size_t Rep) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, -T(RANGE), T(RANGE)); + GenRandVect(pVect2, dim, -T(RANGE), T(RANGE)); + + T val1 = L1NormStandard(pVect1, pVect2, dim); + T val2 = L1Norm(pVect1, pVect2, dim); + T val3 = L1NormSIMD(pVect1, pVect2, dim); + + bool bug = false; + + if (fabs(val1 - val2)/max(max(val1,val2),T(1e-18)) > 1e-6) { + cerr << "Bug L1 !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << endl; + bug = true; + } + if (fabs(val1 - val3)/max(max(val1,val2),T(1e-18)) > 1e-6) { + cerr << "Bug L1 !!! Dim = " << dim << " val1 = " << val1 << " val3 = " << val3 << endl; + bug = true; + } + if (bug) return false; + } + } + + return true; +} + +template +bool TestL2Agree(size_t N, size_t dim, size_t Rep) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, -T(RANGE), T(RANGE)); + GenRandVect(pVect2, dim, -T(RANGE), T(RANGE)); + + T val1 = L2NormStandard(pVect1, pVect2, dim); + T val2 = L2Norm(pVect1, pVect2, dim); + T val3 = L2NormSIMD(pVect1, pVect2, dim); + + bool bug = false; + + if (fabs(val1 - val2)/max(max(val1,val2),T(1e-18)) > 1e-6) { + cerr << "Bug L2 !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << endl; + bug = true; + } + if (fabs(val1 - val3)/max(max(val1,val2),T(1e-18)) > 1e-6) { + cerr << "Bug L2 !!! Dim = " << dim << " val1 = " << val1 << " val3 = " << val3 << endl; + bug = true; + } + if (bug) return false; + } + } + + + return true; +} + +template +bool TestItakuraSaitoAgree(size_t N, size_t dim, size_t Rep) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + vector precompVect1(dim *2), precompVect2(dim * 2); + T* pPrecompVect1 = &precompVect1[0]; + T* pPrecompVect2 = &precompVect2[0]; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); + GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); + + copy(pVect1, pVect1 + dim, pPrecompVect1); + copy(pVect2, pVect2 + dim, pPrecompVect2); + + PrecompLogarithms(pPrecompVect1, dim); + PrecompLogarithms(pPrecompVect2, dim); + + T val0 = ItakuraSaito(pVect1, pVect2, dim); + T val1 = ItakuraSaitoPrecomp(pPrecompVect1, pPrecompVect2, dim); + T val2 = ItakuraSaitoPrecompSIMD(pPrecompVect1, pPrecompVect2, dim); + + bool bug = false; + + T AbsDiff1 = fabs(val1 - val0); + T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); + + if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { + cerr << "Bug ItakuraSaito !!! Dim = " << dim << " val1 = " << val1 << " val0 = " << val0 << " Diff: " << (val1 - val0) << " RelDiff1: " << RelDiff1 << " << AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + T AbsDiff2 = fabs(val1 - val2); + T RelDiff2 = AbsDiff2/max(max(fabs(val1),fabs(val2)),T(1e-18)); + if (RelDiff2 > 1e-5 && AbsDiff2 > 1e-5) { + cerr << "Bug ItakuraSaito !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << " Diff: " << (val1 - val2) << " RelDiff2: " << RelDiff2 << " AbsDiff2: " << AbsDiff2 << endl; + bug = true; + } + + if (bug) return false; + } + } + + + return true; +} + +template +bool TestKLAgree(size_t N, size_t dim, size_t Rep) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + vector precompVect1(dim *2), precompVect2(dim * 2); + T* pPrecompVect1 = &precompVect1[0]; + T* pPrecompVect2 = &precompVect2[0]; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); + GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); + + copy(pVect1, pVect1 + dim, pPrecompVect1); + copy(pVect2, pVect2 + dim, pPrecompVect2); + + PrecompLogarithms(pPrecompVect1, dim); + PrecompLogarithms(pPrecompVect2, dim); + + T val0 = KLStandard(pVect1, pVect2, dim); + T val1 = KLStandardLogDiff(pVect1, pVect2, dim); + T val2 = KLPrecomp(pPrecompVect1, pPrecompVect2, dim); + T val3 = KLPrecompSIMD(pPrecompVect1, pPrecompVect2, dim); + + bool bug = false; + + /* + * KLStandardLog has a worse accuracy due to computing the log of ratios + * as opposed to difference of logs, but it is more efficient (log can be + * expensive to compute) + */ + + T AbsDiff1 = fabs(val1 - val0); + T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); + if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { + cerr << "Bug KL !!! Dim = " << dim << " val0 = " << val0 << " val1 = " << val1 << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 << " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + T AbsDiff2 = fabs(val1 - val2); + T RelDiff2 = AbsDiff2/max(max(fabs(val1),fabs(val2)),T(1e-18)); + if (RelDiff2 > 1e-5 && AbsDiff2 > 1e-5) { + cerr << "Bug KL !!! Dim = " << dim << " val2 = " << val2 << " val1 = " << val1 << " Diff: " << (val2 - val1) << " RelDiff2: " << RelDiff2 << " AbsDiff2: " << AbsDiff2 << endl; + bug = true; + } + + T AbsDiff3 = fabs(val1 - val3); + T RelDiff3 = AbsDiff3/max(max(fabs(val1),fabs(val3)),T(1e-18)); + if (RelDiff3 > 1e-5 && AbsDiff3 > 1e-5) { + cerr << "Bug KL !!! Dim = " << dim << " val3 = " << val3 << " val1 = " << val1 << " Diff: " << (val3 - val1) << " RelDiff3: " << RelDiff3 << " AbsDiff3: " << AbsDiff3 << endl; + bug = true; + } + + if (bug) return false; + } + } + + + return true; +} + +template +bool TestKLGeneralAgree(size_t N, size_t dim, size_t Rep) { + T* pVect1 = new T[dim]; + T* pVect2 = new T[dim]; + T* pPrecompVect1 = new T[dim * 2]; + T* pPrecompVect2 = new T[dim * 2]; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), false); + GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), false); + + copy(pVect1, pVect1 + dim, pPrecompVect1); + copy(pVect2, pVect2 + dim, pPrecompVect2); + + PrecompLogarithms(pPrecompVect1, dim); + PrecompLogarithms(pPrecompVect2, dim); + + T val0 = KLGeneralStandard(pVect1, pVect2, dim); + T val2 = KLGeneralPrecomp(pPrecompVect1, pPrecompVect2, dim); + T val3 = KLGeneralPrecompSIMD(pPrecompVect1, pPrecompVect2, dim); + + bool bug = false; + + T AbsDiff1 = fabs(val2 - val0); + T RelDiff1 = AbsDiff1/max(max(fabs(val2),fabs(val0)),T(1e-18)); + if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { + cerr << "Bug KL !!! Dim = " << dim << " val0 = " << val0 << " val2 = " << val2 << " Diff: " << (val0 - val2) << " RelDiff1: " << RelDiff1 << " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + T AbsDiff2 = fabs(val3 - val2); + T RelDiff2 = AbsDiff2/max(max(fabs(val3),fabs(val2)),T(1e-18)); + if (RelDiff2 > 1e-5 && AbsDiff2 > 1e-5) { + cerr << "Bug KL !!! Dim = " << dim << " val2 = " << val2 << " val3 = " << val3 << " Diff: " << (val2 - val3) << " RelDiff2: " << RelDiff2 << " AbsDiff2: " << AbsDiff2 << endl; + bug = true; + } + + if (bug) return false; + } + } + + + return true; +} + +template +bool TestJSAgree(size_t N, size_t dim, size_t Rep, double pZero) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + vector precompVect1(dim *2), precompVect2(dim * 2); + T* pPrecompVect1 = &precompVect1[0]; + T* pPrecompVect2 = &precompVect2[0]; + + T Dist = 0; + T Error = 0; + T TotalQty = 0; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); + SetRandZeros(pVect1, dim, pZero); + Normalize(pVect1, dim); + GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); + SetRandZeros(pVect2, dim, pZero); + Normalize(pVect2, dim); + + copy(pVect1, pVect1 + dim, pPrecompVect1); + copy(pVect2, pVect2 + dim, pPrecompVect2); + + PrecompLogarithms(pPrecompVect1, dim); + PrecompLogarithms(pPrecompVect2, dim); + + T val0 = JSStandard(pVect1, pVect2, dim); + T val1 = JSPrecomp(pPrecompVect1, pPrecompVect2, dim); + + bool bug = false; + + T AbsDiff1 = fabs(val1 - val0); + T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); + + if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { + cerr << "Bug JS (1) " << typeid(T).name() << " !!! Dim = " << dim << " val0 = " << val0 << " val1 = " << val1 << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 << " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + T val2 = JSPrecompApproxLog(pPrecompVect1, pPrecompVect2, dim); + T val3 = JSPrecompSIMDApproxLog(pPrecompVect1, pPrecompVect2, dim); + + T AbsDiff2 = fabs(val2 - val3); + T RelDiff2 = AbsDiff2/max(max(fabs(val2),fabs(val3)),T(1e-18)); + + if (RelDiff2 > 1e-5 && AbsDiff2 > 1e-5) { + cerr << "Bug JS (2) " << typeid(T).name() << " !!! Dim = " << dim << " val2 = " << val2 << " val3 = " << val3 << " Diff: " << (val2 - val3) << " RelDiff2: " << RelDiff2 << " AbsDiff2: " << AbsDiff2 << endl; + bug = true; + } + + T AbsDiff3 = fabs(val1 - val2); + T RelDiff3 = AbsDiff3/max(max(fabs(val1),fabs(val2)),T(1e-18)); + + Dist += val1; + Error += AbsDiff3; + ++TotalQty; + + if (RelDiff3 > 1e-4 && AbsDiff3 > 1e-4) { + cerr << "Bug JS (3) " << typeid(T).name() << " !!! Dim = " << dim << " val1 = " << val1 << " val2 = " << val2 << " Diff: " << (val1 - val2) << " RelDiff3: " << RelDiff3 << " AbsDiff2: " << AbsDiff3 << endl; + bug = true; + } + + if (bug) return false; + } + } + + LOG(LIB_INFO) << typeid(T).name() << " JS approximation error: average absolute: " << Error / TotalQty << + " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; + + + return true; +} + +template +bool TestRenyiDivAgree(size_t N, size_t dim, size_t Rep, T alpha) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + T Dist = 0; + T Error = 0; + T TotalQty = 0; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); + GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); + + Normalize(pVect1, dim); + Normalize(pVect2, dim); + + T val0 = renyiDivergenceSlow(pVect1, pVect2, dim, alpha); + T val1 = renyiDivergenceFast(pVect1, pVect2, dim, alpha); + + bool bug = false; + + T AbsDiff1 = fabs(val1 - val0); + T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); + + Error += AbsDiff1; + ++TotalQty; + + if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { + cerr << "Bug Reniy Div. (1) " << typeid(T).name() << " !!! Dim = " << dim + << "alpha=" << alpha << " val0 = " << val0 << " val1 = " << val1 + << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 + << " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + if (bug) return false; + } + } + + LOG(LIB_INFO) << typeid(T).name() << " Renyi Div. approximation error: average absolute: " << Error / TotalQty << + " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; + + + return true; +} + +template +bool TestAlphaBetaDivAgree(size_t N, size_t dim, size_t Rep, T alpha, T beta) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + T Dist = 0; + T Error = 0; + T TotalQty = 0; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); + GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); + + Normalize(pVect1, dim); + Normalize(pVect2, dim); + + T val0 = alphaBetaDivergenceSlow(pVect1, pVect2, dim, alpha, beta); + T val1 = alphaBetaDivergenceFast(pVect1, pVect2, dim, alpha, beta); + + bool bug = false; + + T AbsDiff1 = fabs(val1 - val0); + T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); + + Error += AbsDiff1; + ++TotalQty; + + if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { + cerr << "Bug alpha-beta Div. (1) " << typeid(T).name() << " !!! Dim = " << dim + << "alpha=" << alpha << " val0 = " << val0 << " val1 = " << val1 + << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 + << " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + if (bug) return false; + } + } + + LOG(LIB_INFO) << typeid(T).name() << " alpha-beta div. approximation error: average absolute: " << Error / TotalQty << " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; + + + return true; +} + +bool TestSpearmanFootruleAgree(size_t N, size_t dim, size_t Rep) { + vector vect1(dim), vect2(dim); + PivotIdType* pVect1 = &vect1[0]; + PivotIdType* pVect2 = &vect2[0]; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandIntVect(pVect1, dim); + GenRandIntVect(pVect2, dim); + + int val0 = SpearmanFootrule(pVect1, pVect2, dim); + int val1 = SpearmanFootruleSIMD(pVect1, pVect2, dim); + + bool bug = false; + + + if (val0 != val1) { + cerr << "Bug SpearmanFootrule !!! Dim = " << dim << " val0 = " << val0 << " val1 = " << val1 << endl; + bug = true; + } + + if (bug) return false; + } + } + + + return true; +} + +bool TestSpearmanRhoAgree(size_t N, size_t dim, size_t Rep) { + vector vect1(dim), vect2(dim); + PivotIdType* pVect1 = &vect1[0]; + PivotIdType* pVect2 = &vect2[0]; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandIntVect(pVect1, dim); + GenRandIntVect(pVect2, dim); + + int val0 = SpearmanRho(pVect1, pVect2, dim); + int val1 = SpearmanRhoSIMD(pVect1, pVect2, dim); + + bool bug = false; + + + if (val0 != val1) { + cerr << "Bug SpearmanRho !!! Dim = " << dim << " val0 = " << val0 << " val1 = " << val1 << " Diff: " << (val0 - val1) << endl; + bug = true; + } + + if (bug) return false; + } + } + + + return true; +} + +template +bool TestLPGenericAgree(size_t N, size_t dim, size_t Rep, T power) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + T TotalQty = 0, Error = 0, Dist = 0; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, -T(RANGE), T(RANGE)); + GenRandVect(pVect2, dim, -T(RANGE), T(RANGE)); + + T val0 = LPGenericDistance(pVect1, pVect2, dim, power); + T val1 = LPGenericDistanceOptim(pVect1, pVect2, dim, power); + + bool bug = false; + + T AbsDiff1 = fabs(val1 - val0); + T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); + + T maxRelDiff = 1e-5f; + T maxAbsDiff = 1e-5f; + /* + * For large powers, the difference can be larger, + * because our approximations are efficient, but not very + * precise + */ + if (power > 8) { maxAbsDiff = maxRelDiff = 1e-3f;} + if (power > 12) { maxAbsDiff = maxRelDiff = 0.01f;} + if (power > 22) { maxAbsDiff = maxRelDiff = 0.1f;} + + ++TotalQty; + Error += RelDiff1; + Dist += val0; + + if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { + cerr << "Bug LP" << power << " !!! Dim = " << dim << + " val1 = " << val1 << " val0 = " << val0 << + " Diff: " << (val1 - val0) << + " RelDiff1: " << RelDiff1 << + " (max for this power: " << maxRelDiff << ") " << + " AbsDiff1: " << AbsDiff1 << " (max for this power: " << maxAbsDiff << ")" << endl; + } + + if (bug) return false; + } + } + + if (power < 4) { + LOG(LIB_INFO) << typeid(T).name() << " LP approximation error: average absolute " << Error / TotalQty << " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; + + } + + return true; +} + +bool TestBitHammingAgree(size_t N, size_t dim, size_t Rep) { + size_t WordQty = (dim + 31)/32; + vector arr(N * WordQty); + uint32_t* pArr = &arr[0]; + + uint32_t *p = pArr; + for (size_t i = 0; i < N; ++i, p+= WordQty) { + vector perm(dim); + GenRandIntVect(&perm[0], dim); + for (unsigned j = 0; j < dim; ++j) + perm[j] = perm[j] % 2; + vector h; + Binarize(perm, 1, h); + CHECK(h.size() == WordQty); + memcpy(p, &h[0], WordQty * sizeof(h[0])); + } + + WallClockTimer t; + + t.reset(); + + bool res = true; + + for (size_t j = 1; j < N; ++j) { + uint32_t* pVect1 = pArr + j*WordQty; + uint32_t* pVect2 = pArr + (j-1)*WordQty; + int d1 = BitHamming(pVect1, pVect2, WordQty); + int d2 = 0; + + for (unsigned t = 0; t < WordQty; ++t) { + for (unsigned k = 0; k < 32; ++k) { + d2 += ((pVect1[t]>>k)&1) != ((pVect2[t]>>k)&1); + } + } + if (d1 != d2) { + cerr << "Bug bit hamming, WordQty = " << WordQty << " d1 = " << d1 << " d2 = " << d2 << endl; + res = false; + break; + } + } + + return res; +} + + +bool TestSparseAngularDistanceAgree(const string& dataFile, size_t N, size_t Rep) { + typedef float T; + + unique_ptr spaceFast(new SpaceSparseAngularDistanceFast()); + unique_ptr> spaceReg(new SpaceSparseAngularDistance()); + + ObjectVector elemsFast; + ObjectVector elemsReg; + vector tmp; + + unique_ptr inpStateFast(spaceFast->ReadDataset(elemsFast, tmp, dataFile, N)); + spaceFast->UpdateParamsFromFile(*inpStateFast); + unique_ptr inpStateReg(spaceReg->ReadDataset(elemsReg, tmp, dataFile, N)); + spaceReg->UpdateParamsFromFile(*inpStateReg); + + CHECK(elemsFast.size() == elemsReg.size()); + + N = min(N, elemsReg.size()); + + bool bug = false; + + float maxRelDiff = 2e-5f; + float maxAbsDiff = 1e-6f; + + for (size_t j = Rep; j < N; ++j) + for (size_t k = j - Rep; k < j; ++k) { + float val1 = spaceFast->IndexTimeDistance(elemsFast[k], elemsFast[j]); + float val2 = spaceReg->IndexTimeDistance(elemsReg[k], elemsReg[j]); + + float AbsDiff1 = fabs(val1 - val2); + float RelDiff1 = AbsDiff1 / max(max(fabs(val1), fabs(val2)), T(1e-18)); + + if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { + cerr << "Bug fast vs non-fast angular dist " << + " val1 = " << val1 << " val2 = " << val2 << + " Diff: " << (val1 - val2) << + " RelDiff1: " << RelDiff1 << + " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + if (bug) return false; + } + + return true; +} + + + +bool TestSparseCosineSimilarityAgree(const string& dataFile, size_t N, size_t Rep) { + typedef float T; + + unique_ptr spaceFast(new SpaceSparseCosineSimilarityFast()); + unique_ptr> spaceReg (new SpaceSparseCosineSimilarity()); + + ObjectVector elemsFast; + ObjectVector elemsReg; + vector tmp; + + unique_ptr inpStateFast(spaceFast->ReadDataset(elemsFast, tmp, dataFile, N)); + spaceFast->UpdateParamsFromFile(*inpStateFast); + unique_ptr inpStateReg(spaceReg->ReadDataset(elemsReg, tmp, dataFile, N)); + spaceReg->UpdateParamsFromFile(*inpStateReg); + + CHECK(elemsFast.size() == elemsReg.size()); + + N = min(N, elemsReg.size()); + + bool bug = false; + + float maxRelDiff = 1e-5f; + float maxAbsDiff = 1e-5f; + + for (size_t j = Rep; j < N; ++j) + for (size_t k = j - Rep; k < j; ++k) { + float val1 = spaceFast->IndexTimeDistance(elemsFast[k], elemsFast[j]); + float val2 = spaceReg->IndexTimeDistance(elemsReg[k], elemsReg[j]); + + float AbsDiff1 = fabs(val1 - val2); + float RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val2)),T(1e-18)); + + if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { + cerr << "Bug fast vs non-fast cosine " << + " val1 = " << val1 << " val2 = " << val2 << + " Diff: " << (val1 - val2) << + " RelDiff1: " << RelDiff1 << + " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + if (bug) return false; + } + + return true; +} + +bool TestSparseNegativeScalarProductAgree(const string& dataFile, size_t N, size_t Rep) { + typedef float T; + + unique_ptr spaceFast(new SpaceSparseNegativeScalarProductFast()); + unique_ptr> spaceReg (new SpaceSparseNegativeScalarProduct()); + + ObjectVector elemsFast; + ObjectVector elemsReg; + vector tmp; + + unique_ptr inpStateFast(spaceFast->ReadDataset(elemsFast, tmp, dataFile, N)); + spaceFast->UpdateParamsFromFile(*inpStateFast); + unique_ptr inpStateReg(spaceReg->ReadDataset(elemsReg, tmp, dataFile, N)); + spaceReg->UpdateParamsFromFile(*inpStateReg); + + CHECK(elemsFast.size() == elemsReg.size()); + + N = min(N, elemsReg.size()); + + bool bug = false; + + float maxRelDiff = 1e-6f; + float maxAbsDiff = 1e-6f; + + for (size_t j = Rep; j < N; ++j) + for (size_t k = j - Rep; k < j; ++k) { + float val1 = spaceFast->IndexTimeDistance(elemsFast[k], elemsFast[j]); + float val2 = spaceReg->IndexTimeDistance(elemsReg[k], elemsReg[j]); + + float AbsDiff1 = fabs(val1 - val2); + float RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val2)),T(1e-18)); + + if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { + cerr << "Bug fast vs non-fast negative scalar/dot product " << + " val1 = " << val1 << " val2 = " << val2 << + " Diff: " << (val1 - val2) << + " RelDiff1: " << RelDiff1 << + " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + if (bug) return false; + } + + return true; +} + +bool TestSparseQueryNormNegativeScalarProductAgree(const string& dataFile, size_t N, size_t Rep) { + typedef float T; + + unique_ptr spaceFast(new SpaceSparseQueryNormNegativeScalarProductFast()); + unique_ptr> spaceReg (new SpaceSparseQueryNormNegativeScalarProduct()); + + ObjectVector elemsFast; + ObjectVector elemsReg; + vector tmp; + + unique_ptr inpStateFast(spaceFast->ReadDataset(elemsFast, tmp, dataFile, N)); + spaceFast->UpdateParamsFromFile(*inpStateFast); + unique_ptr inpStateReg(spaceReg->ReadDataset(elemsReg, tmp, dataFile, N)); + spaceReg->UpdateParamsFromFile(*inpStateReg); + + CHECK(elemsFast.size() == elemsReg.size()); + + N = min(N, elemsReg.size()); + + bool bug = false; + + float maxRelDiff = 1e-6f; + float maxAbsDiff = 1e-6f; + + for (size_t j = Rep; j < N; ++j) + for (size_t k = j - Rep; k < j; ++k) { + float val1 = spaceFast->IndexTimeDistance(elemsFast[k], elemsFast[j]); + float val2 = spaceReg->IndexTimeDistance(elemsReg[k], elemsReg[j]); + + float AbsDiff1 = fabs(val1 - val2); + float RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val2)),T(1e-18)); + + if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { + cerr << "Bug fast vs non-fast QUERY-NORMALIZED negative scalar/dot product " << + " val1 = " << val1 << " val2 = " << val2 << + " Diff: " << (val1 - val2) << + " RelDiff1: " << RelDiff1 << + " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + if (bug) return false; + } + + return true; +} + +// Limitation: this is only for spaces without params +bool TestPivotIndex(const string& spaceName, + bool useDummyIndex, + const string& dataFile, size_t dataQty, + const string& pivotFile, size_t pivotQty) { + + LOG(LIB_INFO) << "space: " << spaceName << " real pivot index?: " << !useDummyIndex << " " << + " dataFile: " << dataFile << " " << + " pivotFile: " << pivotFile; + try { + typedef float T; + + AnyParams emptyParams; + + unique_ptr> space(SpaceFactoryRegistry::Instance().CreateSpace(spaceName, emptyParams)); + + ObjectVector data; + ObjectVector pivots; + vector tmp; + + float maxRelDiff = 1e-6f; + float maxAbsDiff = 1e-6f; + + unique_ptr inpStateFast(space->ReadDataset(data, tmp, dataFile, dataQty)); + space->UpdateParamsFromFile(*inpStateFast); + space->ReadDataset(pivots, tmp, pivotFile, pivotQty); + + unique_ptr> pivIndx(useDummyIndex ? + new DummyPivotIndex(*space, pivots) + : + space->CreatePivotIndex(pivots, + 0 /* Let's not test using the hashing trick here, b/c distances would be somewhat different */)); + + for (size_t did = 0; did < dataQty; ++did) { + vector vDst; + pivIndx->ComputePivotDistancesIndexTime(data[did], vDst); + CHECK_MSG(vDst.size() == pivotQty, "ComputePivotDistancesIndexTime returns incorrect # of elements different from the # of pivots"); + + for (size_t pid = 0; pid < pivotQty; ++pid) { + T val2 = space->IndexTimeDistance(pivots[pid], data[did]); + T val1 = vDst[pid]; + + float AbsDiff1 = fabs(val1 - val2); + float RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val2)),T(1e-18)); + + if (RelDiff1 > maxRelDiff && AbsDiff1 > maxAbsDiff) { + cerr << "Bug in fast computation of all-pivot distance, " << + " space: " << spaceName << " real pivot index?: " << !useDummyIndex << endl << + " dataFile: " << dataFile << endl << + " pivotFile: " << pivotFile << endl << + " data index: " << did << " pivot index: " << pid << endl << + " val1 = " << val1 << " val2 = " << val2 << + " Diff: " << (val1 - val2) << + " RelDiff1: " << RelDiff1 << + " AbsDiff1: " << AbsDiff1 << endl; + return false; + } + } + } + } catch (const exception& e) { + LOG(LIB_INFO) << "Got exception while testing: " << e.what(); + return false; + } + return true; +} + + + + +#ifdef DISABLE_LONG_TESTS +TEST(DISABLE_TestAgree) { +#else +TEST(TestAgree) { +#endif + int nTest = 0; + int nFail = 0; + + nTest++; + nFail += !TestSparseAngularDistanceAgree(sampleDataPrefix + "sparse_5K.txt", 1000, 200); + + nTest++; + nFail += !TestSparseAngularDistanceAgree(sampleDataPrefix + "sparse_wiki_5K.txt", 1000, 200); + + nTest++; + nFail += !TestSparseCosineSimilarityAgree(sampleDataPrefix + "sparse_5K.txt", 1000, 200); + + nTest++; + nFail += !TestSparseCosineSimilarityAgree(sampleDataPrefix + "sparse_wiki_5K.txt", 1000, 200); + + + nTest++; + nFail += !TestSparseNegativeScalarProductAgree(sampleDataPrefix + "sparse_5K.txt", 1000, 200); + + nTest++; + nFail += !TestSparseNegativeScalarProductAgree(sampleDataPrefix + "sparse_wiki_5K.txt", 1000, 200); + + nTest++; + nFail += !TestSparseQueryNormNegativeScalarProductAgree(sampleDataPrefix + "sparse_5K.txt", 1000, 200); + + nTest++; + nFail += !TestSparseQueryNormNegativeScalarProductAgree(sampleDataPrefix + "sparse_wiki_5K.txt", 1000, 200); + + + /* + * 32 should be more than enough for almost all methods, + * where loop-unrolling includes at most 16 distance computations. + * + * Bit-Hamming is an exception. + * + */ + for (unsigned dim = 1; dim <= 1024; dim+=2) { + LOG(LIB_INFO) << "Dim = " << dim; + + nFail += !TestBitHammingAgree(1000, dim, 1000); + } + + for (unsigned dim = 1; dim <= 32; ++dim) { + LOG(LIB_INFO) << "Dim = " << dim; + + /* + * This is a costly check, we don't need to do it for large # dimensions. + * Anyways, the function is not using any loop unrolling, so 8 should be sufficient. + */ + if (dim <= 8) { + + for (float power = 0.125; power <= 32; power += 0.125) { + TestLPGenericAgree(1024, dim, 10, power); + } + for (double power = 0.125; power <= 32; power += 0.125) { + TestLPGenericAgree(1024, dim, 10, power); + } + + // In the case of Renyi divergence 0 < alpha < 1, 1 < alpha < infinity + // https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy#R%C3%A9nyi_divergence + for (float alpha = 0.125; alpha <= 2; alpha += 0.125) { + if (fabs(alpha - 1) < 1e-6) continue; + TestRenyiDivAgree(1024, dim, 10, alpha); + } + for (double alpha = 0.125; alpha <= 2; alpha += 0.125) { + if (fabs(alpha - 1) < 1e-6) continue; + TestRenyiDivAgree(1024, dim, 10, alpha); + } + + for (float alpha = -2; alpha <= 2; alpha += 0.5) + for (float beta = -2; beta <= 2; beta += 0.5) + { + TestAlphaBetaDivAgree(1024, dim, 10, alpha, beta); + } + + for (double alpha = -2; alpha <= 2; alpha += 0.5) + for (double beta = -2; beta <= 2; beta += 0.5) + { + TestAlphaBetaDivAgree(1024, dim, 10, alpha, beta); + } + } + + nTest++; + nFail += !TestNormScalarProductAgree(1024, dim, 10); + nTest++; + nFail += !TestNormScalarProductAgree(1024, dim, 10); + + nTest++; + nFail += !TestScalarProductAgree(1024, dim, 10); + nTest++; + nFail += !TestScalarProductAgree(1024, dim, 10); + + nTest++; + nFail += !TestSpearmanFootruleAgree(1024, dim, 10); + + nTest++; + nFail += !TestSpearmanRhoAgree(1024, dim, 10); + + nTest++; + nFail += !TestJSAgree(1024, dim, 10, 0.5); + nTest++; + nFail += !TestJSAgree(1024, dim, 10, 0.5); + + nTest++; + nFail += !TestKLGeneralAgree(1024, dim, 10); + nTest++; + nFail += !TestKLGeneralAgree(1024, dim, 10); + + nTest++; + nFail += !TestLInfAgree(1024, dim, 10); + nTest++; + nFail += !TestLInfAgree(1024, dim, 10); + + nTest++; + nFail += !TestL1Agree(1024, dim, 10); + nTest++; + nFail += !TestL1Agree(1024, dim, 10); + + nTest++; + nFail += !TestL2Agree(1024, dim, 10); + nTest++; + nFail += !TestL2Agree(1024, dim, 10); + + nTest++; + nFail += !TestKLAgree(1024, dim, 10); + nTest++; + nFail += !TestKLAgree(1024, dim, 10); + + nTest++; + nFail += !TestItakuraSaitoAgree(1024, dim, 10); + nTest++; + nFail += !TestItakuraSaitoAgree(1024, dim, 10); + } + + LOG(LIB_INFO) << nTest << " (sub) tests performed " << nFail << " failed"; + + EXPECT_EQ(0, nFail); +} + +#ifdef DISABLE_LONG_TESTS +TEST(DISABLE_TestAgreePivotIndex) { +#else +TEST(TestAgreePivotIndex) { +#endif + int nTest = 0; + int nFail = 0; + + const size_t dataQty = 1000; + const size_t pivotQty = 100; + + vector vDataFiles = {"sparse_5K.txt", "sparse_wiki_5K.txt"}; + vector vSpaces = {SPACE_SPARSE_COSINE_SIMILARITY_FAST, SPACE_SPARSE_ANGULAR_DISTANCE_FAST, + SPACE_SPARSE_NEGATIVE_SCALAR_FAST, SPACE_SPARSE_QUERY_NORM_NEGATIVE_SCALAR_FAST}; + const string pivotFile = "sparse_pivots1K_termQty5K_maxId_100K.txt"; + + for (string spaceName : vSpaces) + for (string dataFile : vDataFiles) { + // 1. test with a dummy pivot index + nTest++; + nFail += !TestPivotIndex(spaceName, true, sampleDataPrefix + dataFile, dataQty, sampleDataPrefix + pivotFile, pivotQty); + + // 2. test with a real pivot index + nTest++; + nFail += !TestPivotIndex(spaceName, false, sampleDataPrefix + dataFile, dataQty, sampleDataPrefix + pivotFile, pivotQty); + } + + LOG(LIB_INFO) << nTest << " (sub) tests performed " << nFail << " failed"; + + EXPECT_EQ(0, nFail); +} + + } // namespace similarity -// + diff --git a/similarity_search/test/test_space_serial.cc b/similarity_search/test/test_space_serial.cc index 5dd36b7..17a5c6b 100644 --- a/similarity_search/test/test_space_serial.cc +++ b/similarity_search/test/test_space_serial.cc @@ -135,9 +135,6 @@ bool fullTest(const vector& dataSetStr, size_t maxNumRec, const string& dataSet1.push_back(space->CreateObjFromStr(id++, -1, s, NULL).release()); vExternIds1.push_back(ss.str()); -// std::cout << space->CreateStrFromObj(dataSet1[dataSet1.size() - 1], NULL) << std::endl; - std::cout << s << std::endl; - if (id >= maxNumRec) break; } @@ -153,53 +150,53 @@ const char *emptyParams[] = {NULL}; const char *paramsDistL2[] = {"dist=" SPACE_WORD_EMBED_DIST_L2, NULL}; const char *paramsDistCosine[] = {"dist=" SPACE_WORD_EMBED_DIST_COSINE, NULL}; -//TEST(Test_WordEmbedSpace) { -// for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { -// EXPECT_EQ(true, fullTest("glove.6B.100d_100.txt", maxNumRec, "tmp_out_file.txt", SPACE_WORD_EMBED, paramsDistL2, true)); -// EXPECT_EQ(true, fullTest("glove.6B.100d_100.txt", maxNumRec, "tmp_out_file.txt",SPACE_WORD_EMBED, paramsDistCosine, true)); -// EXPECT_EQ(true, fullTest("glove.6B.100d_100.txt", maxNumRec, "tmp_out_file.txt", SPACE_WORD_EMBED, paramsDistL2, true)); -// EXPECT_EQ(true, fullTest("glove.6B.100d_100.txt", maxNumRec, "tmp_out_file.txt",SPACE_WORD_EMBED, paramsDistCosine, true)); -// } -//} -// -//TEST(Test_DenseVectorSpace) { -// for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { -// EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "l2", emptyParams, false)); -// EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "l2", emptyParams, false)); -// } -//} -// -//TEST(Test_DenseVectorKLDiv) { -// // Test KL-diverg. with and without precomputation of logarithms -// for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { -// EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "kldivgenfast", emptyParams, false)); -// EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "kldivgenfast", emptyParams, false)); -// EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "kldivgenslow", emptyParams, false)); -// EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "kldivgenslow", emptyParams, false)); -// } -//} -// -//TEST(Test_SparseVectorSpace) { -// for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { -// EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "cosinesimil_sparse", emptyParams, false)); -// EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "angulardist_sparse", emptyParams, false)); -// EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "cosinesimil_sparse", emptyParams, false)); -// EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "angulardist_sparse", emptyParams, false)); -// } -//} -// -//TEST(Test_SparseVectorSpaceFast) { -// for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { -// EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "cosinesimil_sparse_fast", emptyParams, false)); -// EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "angulardist_sparse_fast", emptyParams, false)); -// } -//} -// -//TEST(Test_StringSpace) { -// for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { -// EXPECT_EQ(true, fullTest("dna32_4_5K.txt", maxNumRec, "tmp_out_file.txt", "leven", emptyParams, false)); -// } -//} +TEST(Test_WordEmbedSpace) { + for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { + EXPECT_EQ(true, fullTest("glove.6B.100d_100.txt", maxNumRec, "tmp_out_file.txt", SPACE_WORD_EMBED, paramsDistL2, true)); + EXPECT_EQ(true, fullTest("glove.6B.100d_100.txt", maxNumRec, "tmp_out_file.txt",SPACE_WORD_EMBED, paramsDistCosine, true)); + EXPECT_EQ(true, fullTest("glove.6B.100d_100.txt", maxNumRec, "tmp_out_file.txt", SPACE_WORD_EMBED, paramsDistL2, true)); + EXPECT_EQ(true, fullTest("glove.6B.100d_100.txt", maxNumRec, "tmp_out_file.txt",SPACE_WORD_EMBED, paramsDistCosine, true)); + } +} + +TEST(Test_DenseVectorSpace) { + for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { + EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "l2", emptyParams, false)); + EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "l2", emptyParams, false)); + } +} + +TEST(Test_DenseVectorKLDiv) { + // Test KL-diverg. with and without precomputation of logarithms + for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { + EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "kldivgenfast", emptyParams, false)); + EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "kldivgenfast", emptyParams, false)); + EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "kldivgenslow", emptyParams, false)); + EXPECT_EQ(true, fullTest("final128_10K.txt", maxNumRec, "tmp_out_file.txt", "kldivgenslow", emptyParams, false)); + } +} + +TEST(Test_SparseVectorSpace) { + for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { + EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "cosinesimil_sparse", emptyParams, false)); + EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "angulardist_sparse", emptyParams, false)); + EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "cosinesimil_sparse", emptyParams, false)); + EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "angulardist_sparse", emptyParams, false)); + } +} + +TEST(Test_SparseVectorSpaceFast) { + for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { + EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "cosinesimil_sparse_fast", emptyParams, false)); + EXPECT_EQ(true, fullTest("sparse_5K.txt", maxNumRec, "tmp_out_file.txt", "angulardist_sparse_fast", emptyParams, false)); + } +} + +TEST(Test_StringSpace) { + for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { + EXPECT_EQ(true, fullTest("dna32_4_5K.txt", maxNumRec, "tmp_out_file.txt", "leven", emptyParams, false)); + } +} TEST(Test_BitHamming) { vector testVect; @@ -235,15 +232,15 @@ TEST(Test_BitJaccard) { } } -//#if defined(WITH_EXTRAS) -//TEST(Test_SQFD) { -// const char* sqfdParams[] = {"alpha=1", NULL} ; -// for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { -// EXPECT_EQ(true, fullTest("sqfd20_10k_10k.txt", maxNumRec, "tmp_out_file.txt", "sqfd_heuristic_func", sqfdParams, false)); -// EXPECT_EQ(true, fullTest("sqfd20_10k_10k.txt", maxNumRec, "tmp_out_file.txt", "sqfd_heuristic_func", sqfdParams, false)); -// } -//} -//#endif +#if defined(WITH_EXTRAS) +TEST(Test_SQFD) { + const char* sqfdParams[] = {"alpha=1", NULL} ; + for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { + EXPECT_EQ(true, fullTest("sqfd20_10k_10k.txt", maxNumRec, "tmp_out_file.txt", "sqfd_heuristic_func", sqfdParams, false)); + EXPECT_EQ(true, fullTest("sqfd20_10k_10k.txt", maxNumRec, "tmp_out_file.txt", "sqfd_heuristic_func", sqfdParams, false)); + } +} +#endif }