From e4d3084bf1cc24a6cc2a2e4690aaaea591dcc0a3 Mon Sep 17 00:00:00 2001 From: searchivairus Date: Mon, 15 Jan 2018 19:21:23 -0500 Subject: [PATCH] A fix for #272 --- similarity_search/include/distcomp.h | 10 ++- .../include/factory/init_spaces.h | 6 +- .../factory/space/space_renyi_diverg.h | 18 ++++- similarity_search/include/pow.h | 15 ++-- .../include/space/space_renyi_diverg.h | 31 ++++++-- .../include/space/space_sparse_scalar_fast.h | 16 ++-- similarity_search/src/distcomp_diverg.cc | 73 +++++++++++++++---- .../src/space/space_ab_diverg.cc | 4 +- .../src/space/space_renyi_diverg.cc | 33 +++++++-- similarity_search/test/test_distfunc.cc | 62 +++++++++++++++- similarity_search/test/test_pow.cc | 5 +- 11 files changed, 220 insertions(+), 53 deletions(-) diff --git a/similarity_search/include/distcomp.h b/similarity_search/include/distcomp.h index fa02f28..8fe78bb 100644 --- a/similarity_search/include/distcomp.h +++ b/similarity_search/include/distcomp.h @@ -198,15 +198,19 @@ template T LPGenericDistanceOptim(const T* x, const T* y, const int * estimators for image classification”. In: Computer Vision and Pattern Recognition (CVPR), 2012 IEEE * Conference on, pages 2989–2996 */ -template T alpha_beta_divergence(const T* x, const T* y, const int length, float alpha, float beta); +template T alphaBetaDivergence(const T *x, const T *y, const int length, float alpha, float beta); +template T alphaBetaDivergenceFast(const T *x, const T *y, const int length, float alpha, float beta); + // A proxy function for alpha-beta divergence that may be used during indexing -template T alpha_beta_divergence_proxy(const T* x, const T* y, const int length, float alpha, float beta); +template T alphaBetaDivergenceProxy(const T *x, const T *y, const int length, float alpha, float beta); +template T alphaBetaDivergenceFastProxy(const T *x, const T *y, const int length, float alpha, float beta); /* * Renyi divergence. * Rényi, Alfréd (1961). "On measures of information and entropy". * Proceedings of the fourth Berkeley Symposium on Mathematics, Statistics and Probability 1960. pp. 547–561. */ -template T renyi_divergence(const T* x, const T* y, const int length, float alpha); +template T renyiDivergence(const T *x, const T *y, const int length, float alpha); +template T renyiDivergenceFast(const T *x, const T *y, const int length, float alpha); /* diff --git a/similarity_search/include/factory/init_spaces.h b/similarity_search/include/factory/init_spaces.h index c3e2276..2f9f445 100644 --- a/similarity_search/include/factory/init_spaces.h +++ b/similarity_search/include/factory/init_spaces.h @@ -141,8 +141,10 @@ inline void initSpaces() { REGISTER_SPACE_CREATOR(float, SPACE_AB_DIVERG, CreateAlphaBetaDiverg) REGISTER_SPACE_CREATOR(double, SPACE_AB_DIVERG, CreateAlphaBetaDiverg) - REGISTER_SPACE_CREATOR(float, SPACE_RENYI_DIVERG, CreateRenyiDiverg) - REGISTER_SPACE_CREATOR(double, SPACE_RENYI_DIVERG, CreateRenyiDiverg) + REGISTER_SPACE_CREATOR(float, SPACE_RENYI_DIVERG_SLOW, CreateRenyiDivergSlow) + REGISTER_SPACE_CREATOR(double, SPACE_RENYI_DIVERG_SLOW, CreateRenyiDivergSlow) + REGISTER_SPACE_CREATOR(float, SPACE_RENYI_DIVERG_FAST, CreateRenyiDivergFast) + REGISTER_SPACE_CREATOR(double, SPACE_RENYI_DIVERG_FAST, CreateRenyiDivergFast) } diff --git a/similarity_search/include/factory/space/space_renyi_diverg.h b/similarity_search/include/factory/space/space_renyi_diverg.h index 21589c9..469bf8b 100644 --- a/similarity_search/include/factory/space/space_renyi_diverg.h +++ b/similarity_search/include/factory/space/space_renyi_diverg.h @@ -25,7 +25,7 @@ namespace similarity { template -Space* CreateRenyiDiverg(const AnyParams& AllParams) { +Space* CreateRenyiDivergSlow(const AnyParams& AllParams) { AnyParamManager pmgr(AllParams); float alpha = 0.5; @@ -35,7 +35,21 @@ Space* CreateRenyiDiverg(const AnyParams& AllParams) { CHECK_MSG(std::fabs(alpha - 1) > 2*std::numeric_limits::min() && alpha > 0, "alpha should be > 0 and != 1"); - return new SpaceRenyiDiverg(alpha); + return new SpaceRenyiDivergSlow(alpha); +} + +template +Space* CreateRenyiDivergFast(const AnyParams& AllParams) { + AnyParamManager pmgr(AllParams); + + float alpha = 0.5; + + pmgr.GetParamOptional("alpha", alpha, alpha); + + CHECK_MSG(std::fabs(alpha - 1) > 2*std::numeric_limits::min() && alpha > 0, + "alpha should be > 0 and != 1"); + + return new SpaceRenyiDivergFast(alpha); } /* diff --git a/similarity_search/include/pow.h b/similarity_search/include/pow.h index 4d5cfc9..b42f8db 100644 --- a/similarity_search/include/pow.h +++ b/similarity_search/include/pow.h @@ -204,12 +204,12 @@ class PowerProxyObject { * @param maxDig a maximum number of binary digits to consider (should be <= 31). */ PowerProxyObject(const T p, const unsigned maxDig = 18) { - CHECK_MSG(p >= 0, "The exponent should be non-negative"); - + pOrig_ = p; + isNeg_ = p < 0; + p_ = (isNeg_) ? -p : p; maxK_ = 1u << maxDig; - unsigned pfm = static_cast(std::floor(maxK_ * p)); + unsigned pfm = static_cast(std::floor(maxK_ * p_)); - p_ = p; isOptim_ = (fabs(maxK_*p_ - pfm) <= 2 * std::numeric_limits::min()); intPow_ = pfm >> maxDig; fractPow_ = pfm - (intPow_ << maxDig); @@ -220,16 +220,19 @@ class PowerProxyObject { * Compute pow(base, p_) possibly efficiently. We expect base to be * non-negative! */ - T inline pow(const T base) { + T inline pow(T base) { if (isOptim_) { + if (isNeg_) base = 1/base; // Negative power T mult1 = intPow_ ? EfficientPow(base, intPow_) : 1; T mult2 = fractPow_ ? EfficientFractPowUtil(base, fractPow_, maxK_) : 1; return mult1 * mult2; } else { - return std::pow(base, p_); + return std::pow(base, pOrig_); } } private: + bool isNeg_; + T pOrig_; T p_; unsigned maxK_; bool isOptim_; diff --git a/similarity_search/include/space/space_renyi_diverg.h b/similarity_search/include/space/space_renyi_diverg.h index 1f3c964..ff983b5 100644 --- a/similarity_search/include/space/space_renyi_diverg.h +++ b/similarity_search/include/space/space_renyi_diverg.h @@ -30,28 +30,43 @@ #include "space_vector.h" #include "distcomp.h" -#define SPACE_RENYI_DIVERG "renyi_diverg" +#define SPACE_RENYI_DIVERG_SLOW "renyidiv_slow" +#define SPACE_RENYI_DIVERG_FAST "renyidiv_fast" namespace similarity { /* - * Renyi divergence. + * Renyi divergences. * */ template -class SpaceRenyiDiverg : public VectorSpaceSimpleStorage { +class SpaceRenyiDivergSlow : public VectorSpaceSimpleStorage { public: - explicit SpaceRenyiDiverg(float alpha) : alpha_(alpha) {} - virtual ~SpaceRenyiDiverg() {} + explicit SpaceRenyiDivergSlow(float alpha) : alpha_(alpha) {} + virtual ~SpaceRenyiDivergSlow() {} - virtual std::string StrDesc() const; + virtual std::string StrDesc() const override; protected: - virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const; + virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const override; private: - DISABLE_COPY_AND_ASSIGN(SpaceRenyiDiverg); + DISABLE_COPY_AND_ASSIGN(SpaceRenyiDivergSlow); + float alpha_; +}; + +template +class SpaceRenyiDivergFast : public VectorSpaceSimpleStorage { + public: + explicit SpaceRenyiDivergFast(float alpha) : alpha_(alpha) {} + virtual ~SpaceRenyiDivergFast() {} + + virtual std::string StrDesc() const override; + protected: + virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const override; + private: + DISABLE_COPY_AND_ASSIGN(SpaceRenyiDivergFast); float alpha_; }; diff --git a/similarity_search/include/space/space_sparse_scalar_fast.h b/similarity_search/include/space/space_sparse_scalar_fast.h index b0b1e25..23b2eb8 100644 --- a/similarity_search/include/space/space_sparse_scalar_fast.h +++ b/similarity_search/include/space/space_sparse_scalar_fast.h @@ -75,14 +75,14 @@ class SpaceDotProdPivotIndexBase: public PivotIndex { class SpaceSparseCosineSimilarityFast : public SpaceSparseVectorInter { public: explicit SpaceSparseCosineSimilarityFast(){} - virtual std::string StrDesc() const { + virtual std::string StrDesc() const override { return SPACE_SPARSE_COSINE_SIMILARITY_FAST; } virtual PivotIndex* CreatePivotIndex(const ObjectVector& pivots, size_t hashTrickDim = 0) const override { return new PivotIndexLocal(*this, pivots, hashTrickDim); } protected: - virtual float HiddenDistance(const Object* obj1, const Object* obj2) const; + virtual float HiddenDistance(const Object* obj1, const Object* obj2) const override; class PivotIndexLocal : public SpaceDotProdPivotIndexBase { public: @@ -102,7 +102,7 @@ class SpaceSparseCosineSimilarityFast : public SpaceSparseVectorInter { class SpaceSparseAngularDistanceFast : public SpaceSparseVectorInter { public: explicit SpaceSparseAngularDistanceFast(){} - virtual std::string StrDesc() const { + virtual std::string StrDesc() const override { return SPACE_SPARSE_ANGULAR_DISTANCE_FAST; } @@ -110,7 +110,7 @@ class SpaceSparseAngularDistanceFast : public SpaceSparseVectorInter { return new PivotIndexLocal(*this, pivots, hashTrickDim); } protected: - virtual float HiddenDistance(const Object* obj1, const Object* obj2) const; + virtual float HiddenDistance(const Object* obj1, const Object* obj2) const override; class PivotIndexLocal : public SpaceDotProdPivotIndexBase { public: @@ -131,7 +131,7 @@ class SpaceSparseAngularDistanceFast : public SpaceSparseVectorInter { class SpaceSparseNegativeScalarProductFast : public SpaceSparseVectorInter { public: explicit SpaceSparseNegativeScalarProductFast(){} - virtual std::string StrDesc() const { + virtual std::string StrDesc() const override { return SPACE_SPARSE_NEGATIVE_SCALAR_FAST; } @@ -139,7 +139,7 @@ class SpaceSparseNegativeScalarProductFast : public SpaceSparseVectorInter { public: explicit SpaceSparseQueryNormNegativeScalarProductFast(){} - virtual std::string StrDesc() const { + virtual std::string StrDesc() const override { return SPACE_SPARSE_QUERY_NORM_NEGATIVE_SCALAR_FAST; } @@ -167,7 +167,7 @@ class SpaceSparseQueryNormNegativeScalarProductFast : public SpaceSparseVectorIn return new PivotIndexLocal(*this, pivots, hashTrickDim); } protected: - virtual float HiddenDistance(const Object* obj1, const Object* obj2) const; + virtual float HiddenDistance(const Object* obj1, const Object* obj2) const override; class PivotIndexLocal : public SpaceDotProdPivotIndexBase { public: diff --git a/similarity_search/src/distcomp_diverg.cc b/similarity_search/src/distcomp_diverg.cc index 5cfaebe..a8320ff 100644 --- a/similarity_search/src/distcomp_diverg.cc +++ b/similarity_search/src/distcomp_diverg.cc @@ -22,37 +22,65 @@ #include "distcomp.h" #include "logging.h" #include "utils.h" +#include "pow.h" namespace similarity { using namespace std; -template T alpha_beta_divergence(const T* x, const T* y, const int length, float alpha, float beta) { +template T alphaBetaDivergence(const T *x, const T *y, const int length, float alpha, float beta) { T res = 0; - float alpha1 = alpha + 1; + float alphaPlus1 = alpha + 1; for (int i = 0; i < length; ++i) { - res += pow(x[i], alpha1)*pow(y[i], beta); + res += pow(x[i], alphaPlus1)*pow(y[i], beta); } return res; } -template float alpha_beta_divergence(const float* x, const float* y, const int length, float alpha, float beta); -template double alpha_beta_divergence(const double* x, const double* y, const int length, float alpha, float beta); +template float alphaBetaDivergence(const float* x, const float* y, const int length, float alpha, float beta); +template double alphaBetaDivergence(const double* x, const double* y, const int length, float alpha, float beta); -template T alpha_beta_divergence_proxy(const T* x, const T* y, const int length, float alpha, float beta) { +template T alphaBetaDivergenceFast(const T *x, const T *y, const int length, float alpha, float beta) { T res = 0; - float alpha1 = alpha + 1; + PowerProxyObject powalphaPlus1(alpha + 1), powBeta(beta); + + for (int i = 0; i < length; ++i) { + res += powalphaPlus1.pow(x[i])*powBeta.pow(y[i]); + } + return res; +} + +template float alphaBetaDivergenceFast(const float* x, const float* y, const int length, float alpha, float beta); +template double alphaBetaDivergenceFast(const double* x, const double* y, const int length, float alpha, float beta); + +template T alphaBetaDivergenceProxy(const T *x, const T *y, const int length, float alpha, float beta) { + T res = 0; + float alphaPlus1 = alpha + 1; + for (int i = 0; i < length; ++i) { + res += (pow(x[i], alphaPlus1)*pow(y[i], beta) + + pow(y[i], alphaPlus1)*pow(x[i], beta)) * 0.5; + } + return res; +} + +template float alphaBetaDivergenceProxy(const float* x, const float* y, const int length, float alpha, float beta); +template double alphaBetaDivergenceProxy(const double* x, const double* y, const int length, float alpha, float beta); + +template T alphaBetaDivergenceFastProxy(const T *x, const T *y, const int length, float alpha, float beta) { + PowerProxyObject powalphaPlus1(alpha + 1), powBeta(beta); + T res = 0; + for (int i = 0; i < length; ++i) { - res += (pow(x[i], alpha1)*pow(y[i], beta) + - pow(y[i], alpha1)*pow(x[i], beta)) * 0.5; + res += powalphaPlus1.pow(x[i])* powBeta.pow(y[i]) + + powalphaPlus1.pow(y[i])* powBeta.pow(x[i]) * 0.5; } return res; } -template float alpha_beta_divergence_proxy(const float* x, const float* y, const int length, float alpha, float beta); -template double alpha_beta_divergence_proxy(const double* x, const double* y, const int length, float alpha, float beta); +template float alphaBetaDivergenceFastProxy(const float* x, const float* y, const int length, float alpha, float beta); +template double alphaBetaDivergenceFastProxy(const double* x, const double* y, const int length, float alpha, float beta); -template T renyi_divergence(const T* x, const T* y, const int length, float alpha) { +template T renyiDivergence(const T *x, const T *y, const int length, float alpha) { T sum = 0; T eps = -1e-6; float t = alpha-1; @@ -65,7 +93,24 @@ template T renyi_divergence(const T* x, const T* y, const int lengt return max(0, res); } -template float renyi_divergence(const float* x, const float* y, const int length, float alpha); -template double renyi_divergence(const double* x, const double* y, const int length, float alpha); +template float renyiDivergence(const float* x, const float* y, const int length, float alpha); +template double renyiDivergence(const double* x, const double* y, const int length, float alpha); + +template T renyiDivergenceFast(const T *x, const T *y, const int length, float alpha) { + float t = alpha-1; + PowerProxyObject powAlphaMinusOne(t); + T sum = 0; + T eps = -1e-6; + for (int i = 0; i < length; ++i) { + sum += x[i]*powAlphaMinusOne.pow(x[i]/y[i]); + } + float res = 1/t * log(sum); + CHECK_MSG(res >= eps, "Expected a non-negative result, but got " + ConvertToString(res) + " for alpha=" + ConvertToString(alpha)); + // Might be slightly negative due to rounding errors + return max(0, res); +} + +template float renyiDivergenceFast(const float* x, const float* y, const int length, float alpha); +template double renyiDivergenceFast(const double* x, const double* y, const int length, float alpha); } diff --git a/similarity_search/src/space/space_ab_diverg.cc b/similarity_search/src/space/space_ab_diverg.cc index 3e8b82a..82984f6 100644 --- a/similarity_search/src/space/space_ab_diverg.cc +++ b/similarity_search/src/space/space_ab_diverg.cc @@ -33,7 +33,7 @@ dist_t SpaceAlphaBetaDiverg::HiddenDistance(const Object* obj1, const Ob const dist_t* y = reinterpret_cast(obj2->data()); const size_t length = obj1->datalength() / sizeof(dist_t); - return alpha_beta_divergence(x, y, length, alpha_, beta_); + return alphaBetaDivergence(x, y, length, alpha_, beta_); } template @@ -44,7 +44,7 @@ dist_t SpaceAlphaBetaDiverg::ProxyDistance(const Object* obj1, const Obj const dist_t* y = reinterpret_cast(obj2->data()); const size_t length = obj1->datalength() / sizeof(dist_t); - return alpha_beta_divergence_proxy(x, y, length, alpha_, beta_); + return alphaBetaDivergenceProxy(x, y, length, alpha_, beta_); } template diff --git a/similarity_search/src/space/space_renyi_diverg.cc b/similarity_search/src/space/space_renyi_diverg.cc index 344161e..32e2067 100644 --- a/similarity_search/src/space/space_renyi_diverg.cc +++ b/similarity_search/src/space/space_renyi_diverg.cc @@ -26,24 +26,45 @@ namespace similarity { template -dist_t SpaceRenyiDiverg::HiddenDistance(const Object* obj1, const Object* obj2) const { +dist_t SpaceRenyiDivergSlow::HiddenDistance(const Object* obj1, const Object* obj2) const { CHECK(obj1->datalength() > 0); CHECK(obj1->datalength() == obj2->datalength()); const dist_t* x = reinterpret_cast(obj1->data()); const dist_t* y = reinterpret_cast(obj2->data()); const size_t length = obj1->datalength() / sizeof(dist_t); - return renyi_divergence(x, y, length, alpha_); + return renyiDivergence(x, y, length, alpha_); } template -std::string SpaceRenyiDiverg::StrDesc() const { +std::string SpaceRenyiDivergSlow::StrDesc() const { std::stringstream stream; - stream << SPACE_RENYI_DIVERG << ":alpha=" << alpha_; + stream << SPACE_RENYI_DIVERG_SLOW << ":alpha=" << alpha_; return stream.str(); } -template class SpaceRenyiDiverg; -template class SpaceRenyiDiverg; +template class SpaceRenyiDivergSlow; +template class SpaceRenyiDivergSlow; + +template +dist_t SpaceRenyiDivergFast::HiddenDistance(const Object* obj1, const Object* obj2) const { + CHECK(obj1->datalength() > 0); + CHECK(obj1->datalength() == obj2->datalength()); + const dist_t* x = reinterpret_cast(obj1->data()); + const dist_t* y = reinterpret_cast(obj2->data()); + const size_t length = obj1->datalength() / sizeof(dist_t); + + return renyiDivergenceFast(x, y, length, alpha_); +} + +template +std::string SpaceRenyiDivergFast::StrDesc() const { + std::stringstream stream; + stream << SPACE_RENYI_DIVERG_FAST << ":alpha=" << alpha_; + return stream.str(); +} + +template class SpaceRenyiDivergFast; +template class SpaceRenyiDivergFast; } // namespace similarity diff --git a/similarity_search/test/test_distfunc.cc b/similarity_search/test/test_distfunc.cc index 0cb4698..6ddde93 100644 --- a/similarity_search/test/test_distfunc.cc +++ b/similarity_search/test/test_distfunc.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -556,6 +557,54 @@ bool TestJSAgree(size_t N, size_t dim, size_t Rep, double pZero) { return true; } +template +bool TestRenyiDivAgree(size_t N, size_t dim, size_t Rep, T alpha) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + T Dist = 0; + T Error = 0; + T TotalQty = 0; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); + GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); + + Normalize(pVect1, dim); + Normalize(pVect2, dim); + + T val0 = renyiDivergence(pVect1, pVect2, dim, alpha); + T val1 = renyiDivergenceFast(pVect1, pVect2, dim, alpha); + + bool bug = false; + + T AbsDiff1 = fabs(val1 - val0); + T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); + + Error += AbsDiff1; + ++TotalQty; + + if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { + cerr << "Bug Reniy Div. (1) " << typeid(T).name() << " !!! Dim = " << dim + << "alpha=" << alpha << " val0 = " << val0 << " val1 = " << val1 + << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 + << " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + if (bug) return false; + } + } + + LOG(LIB_INFO) << typeid(T).name() << " Renyi Div. approximation error: average absolute: " << Error / TotalQty << + " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; + + + return true; +} + bool TestSpearmanFootruleAgree(size_t N, size_t dim, size_t Rep) { vector vect1(dim), vect2(dim); PivotIdType* pVect1 = &vect1[0]; @@ -1034,10 +1083,21 @@ TEST(TestAgree) { for (float power = 0.125; power <= 32; power += 0.125) { TestLPGenericAgree(1024, dim, 10, power); } - for (double power = 0.125; power <= 32; power += 0.125) { TestLPGenericAgree(1024, dim, 10, power); } + + // In the case of Renyi divergence 0 < alpha < 1, 1 < alpha < infinity + // https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy#R%C3%A9nyi_divergence + for (float alpha = 0.125; alpha <= 2; alpha += 0.125) { + if (fabs(alpha - 1) < 1e-6) continue; + TestRenyiDivAgree(1024, dim, 10, alpha); + } + for (double alpha = 0.125; alpha <= 2; alpha += 0.125) { + if (fabs(alpha - 1) < 1e-6) continue; + TestRenyiDivAgree(1024, dim, 10, alpha); + } + } nTest++; diff --git a/similarity_search/test/test_pow.cc b/similarity_search/test/test_pow.cc index 2e9131f..c77da7b 100644 --- a/similarity_search/test/test_pow.cc +++ b/similarity_search/test/test_pow.cc @@ -32,6 +32,7 @@ const float MAX_REL_DIFF = 1e-6f; vector addExps = { 0, 0.125, 0.25, 0.5 }; vector vals = { 0.1, 0.5, 1, 1.5, 2, 4}; +vector signs = { 1, -1}; template bool runTest() { for (unsigned i = 0; i <= 128; ++i) { @@ -40,7 +41,9 @@ template bool runTest() { PowerProxyObject obj(oneExp); - for (float v : vals) { + for (float vpos : vals) + for (float s : signs) { + float v = vpos * s; T expRes = pow(T(v), oneExp); T res = obj.pow(v);