diff --git a/similarity_search/include/distcomp.h b/similarity_search/include/distcomp.h index 8fe78bb..0202206 100644 --- a/similarity_search/include/distcomp.h +++ b/similarity_search/include/distcomp.h @@ -198,18 +198,18 @@ template T LPGenericDistanceOptim(const T* x, const T* y, const int * estimators for image classification”. In: Computer Vision and Pattern Recognition (CVPR), 2012 IEEE * Conference on, pages 2989–2996 */ -template T alphaBetaDivergence(const T *x, const T *y, const int length, float alpha, float beta); +template T alphaBetaDivergenceSlow(const T *x, const T *y, const int length, float alpha,float beta); template T alphaBetaDivergenceFast(const T *x, const T *y, const int length, float alpha, float beta); // A proxy function for alpha-beta divergence that may be used during indexing -template T alphaBetaDivergenceProxy(const T *x, const T *y, const int length, float alpha, float beta); +template T alphaBetaDivergenceSlowProxy(const T *x, const T *y, const int length, float alpha, float beta); template T alphaBetaDivergenceFastProxy(const T *x, const T *y, const int length, float alpha, float beta); /* * Renyi divergence. * Rényi, Alfréd (1961). "On measures of information and entropy". * Proceedings of the fourth Berkeley Symposium on Mathematics, Statistics and Probability 1960. pp. 547–561. */ -template T renyiDivergence(const T *x, const T *y, const int length, float alpha); +template T renyiDivergenceSlow(const T *x, const T *y, const int length, float alpha); template T renyiDivergenceFast(const T *x, const T *y, const int length, float alpha); diff --git a/similarity_search/include/factory/init_spaces.h b/similarity_search/include/factory/init_spaces.h index 2f9f445..34b676f 100644 --- a/similarity_search/include/factory/init_spaces.h +++ b/similarity_search/include/factory/init_spaces.h @@ -138,8 +138,10 @@ inline void initSpaces() { REGISTER_SPACE_CREATOR(double, SPACE_SQFD_GAUSSIAN_FUNC, CreateSqfdGaussianFunc) #endif - REGISTER_SPACE_CREATOR(float, SPACE_AB_DIVERG, CreateAlphaBetaDiverg) - REGISTER_SPACE_CREATOR(double, SPACE_AB_DIVERG, CreateAlphaBetaDiverg) + REGISTER_SPACE_CREATOR(float, SPACE_AB_DIVERG_SLOW, CreateAlphaBetaDivergSlow) + REGISTER_SPACE_CREATOR(double, SPACE_AB_DIVERG_SLOW, CreateAlphaBetaDivergSlow) + REGISTER_SPACE_CREATOR(float, SPACE_AB_DIVERG_FAST, CreateAlphaBetaDivergFast) + REGISTER_SPACE_CREATOR(double, SPACE_AB_DIVERG_FAST, CreateAlphaBetaDivergFast) REGISTER_SPACE_CREATOR(float, SPACE_RENYI_DIVERG_SLOW, CreateRenyiDivergSlow) REGISTER_SPACE_CREATOR(double, SPACE_RENYI_DIVERG_SLOW, CreateRenyiDivergSlow) diff --git a/similarity_search/include/factory/space/space_ab_diverg.h b/similarity_search/include/factory/space/space_ab_diverg.h index db40faa..4f77a9a 100644 --- a/similarity_search/include/factory/space/space_ab_diverg.h +++ b/similarity_search/include/factory/space/space_ab_diverg.h @@ -22,7 +22,7 @@ namespace similarity { template -Space* CreateAlphaBetaDiverg(const AnyParams& AllParams) { +Space* CreateAlphaBetaDivergSlow(const AnyParams& AllParams) { AnyParamManager pmgr(AllParams); float alpha = 1.0, beta = 1.0; @@ -30,7 +30,19 @@ Space* CreateAlphaBetaDiverg(const AnyParams& AllParams) { pmgr.GetParamOptional("alpha", alpha, alpha); pmgr.GetParamOptional("beta", beta, beta); - return new SpaceAlphaBetaDiverg(alpha, beta); + return new SpaceAlphaBetaDivergSlow(alpha, beta); +} + +template +Space* CreateAlphaBetaDivergFast(const AnyParams& AllParams) { + AnyParamManager pmgr(AllParams); + + float alpha = 1.0, beta = 1.0; + + pmgr.GetParamOptional("alpha", alpha, alpha); + pmgr.GetParamOptional("beta", beta, beta); + + return new SpaceAlphaBetaDivergFast(alpha, beta); } /* diff --git a/similarity_search/include/space/space_ab_diverg.h b/similarity_search/include/space/space_ab_diverg.h index 1a64416..f76c4c6 100644 --- a/similarity_search/include/space/space_ab_diverg.h +++ b/similarity_search/include/space/space_ab_diverg.h @@ -30,7 +30,8 @@ #include "space_vector.h" #include "distcomp.h" -#define SPACE_AB_DIVERG "ab_diverg" +#define SPACE_AB_DIVERG_SLOW "abdiv_slow" +#define SPACE_AB_DIVERG_FAST "abdiv_fast" namespace similarity { @@ -45,17 +46,32 @@ namespace similarity { */ template -class SpaceAlphaBetaDiverg : public VectorSpaceSimpleStorage { +class SpaceAlphaBetaDivergSlow : public VectorSpaceSimpleStorage { public: - explicit SpaceAlphaBetaDiverg(float alpha, float beta) : alpha_(alpha), beta_(beta) {} - virtual ~SpaceAlphaBetaDiverg() {} + explicit SpaceAlphaBetaDivergSlow(float alpha, float beta) : alpha_(alpha), beta_(beta) {} + virtual ~SpaceAlphaBetaDivergSlow() {} virtual std::string StrDesc() const override; protected: virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const override; virtual dist_t ProxyDistance(const Object* obj1, const Object* obj2) const override; private: - DISABLE_COPY_AND_ASSIGN(SpaceAlphaBetaDiverg); + DISABLE_COPY_AND_ASSIGN(SpaceAlphaBetaDivergSlow); + float alpha_, beta_; +}; + +template +class SpaceAlphaBetaDivergFast : public VectorSpaceSimpleStorage { + public: + explicit SpaceAlphaBetaDivergFast(float alpha, float beta) : alpha_(alpha), beta_(beta) {} + virtual ~SpaceAlphaBetaDivergFast() {} + + virtual std::string StrDesc() const override; + protected: + virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const override; + virtual dist_t ProxyDistance(const Object* obj1, const Object* obj2) const override; + private: + DISABLE_COPY_AND_ASSIGN(SpaceAlphaBetaDivergFast); float alpha_, beta_; }; diff --git a/similarity_search/src/distcomp_diverg.cc b/similarity_search/src/distcomp_diverg.cc index a8320ff..82b56b0 100644 --- a/similarity_search/src/distcomp_diverg.cc +++ b/similarity_search/src/distcomp_diverg.cc @@ -28,7 +28,9 @@ namespace similarity { using namespace std; -template T alphaBetaDivergence(const T *x, const T *y, const int length, float alpha, float beta) { +template T alphaBetaDivergenceSlow(const T *x, const T *y, + const int length, float alpha, + float beta) { T res = 0; float alphaPlus1 = alpha + 1; for (int i = 0; i < length; ++i) { @@ -37,8 +39,8 @@ template T alphaBetaDivergence(const T *x, const T *y, const int le return res; } -template float alphaBetaDivergence(const float* x, const float* y, const int length, float alpha, float beta); -template double alphaBetaDivergence(const double* x, const double* y, const int length, float alpha, float beta); +template float alphaBetaDivergenceSlow(const float* x, const float* y, const int length, float alpha, float beta); +template double alphaBetaDivergenceSlow(const double* x, const double* y, const int length, float alpha, float beta); template T alphaBetaDivergenceFast(const T *x, const T *y, const int length, float alpha, float beta) { T res = 0; @@ -53,7 +55,9 @@ template T alphaBetaDivergenceFast(const T *x, const T *y, const in template float alphaBetaDivergenceFast(const float* x, const float* y, const int length, float alpha, float beta); template double alphaBetaDivergenceFast(const double* x, const double* y, const int length, float alpha, float beta); -template T alphaBetaDivergenceProxy(const T *x, const T *y, const int length, float alpha, float beta) { +template T alphaBetaDivergenceSlowProxy(const T *x, const T *y, + const int length, + float alpha, float beta) { T res = 0; float alphaPlus1 = alpha + 1; for (int i = 0; i < length; ++i) { @@ -63,8 +67,8 @@ template T alphaBetaDivergenceProxy(const T *x, const T *y, const i return res; } -template float alphaBetaDivergenceProxy(const float* x, const float* y, const int length, float alpha, float beta); -template double alphaBetaDivergenceProxy(const double* x, const double* y, const int length, float alpha, float beta); +template float alphaBetaDivergenceSlowProxy(const float* x, const float* y, const int length, float alpha, float beta); +template double alphaBetaDivergenceSlowProxy(const double* x, const double* y, const int length, float alpha, float beta); template T alphaBetaDivergenceFastProxy(const T *x, const T *y, const int length, float alpha, float beta) { PowerProxyObject powalphaPlus1(alpha + 1), powBeta(beta); @@ -80,7 +84,8 @@ template T alphaBetaDivergenceFastProxy(const T *x, const T *y, con template float alphaBetaDivergenceFastProxy(const float* x, const float* y, const int length, float alpha, float beta); template double alphaBetaDivergenceFastProxy(const double* x, const double* y, const int length, float alpha, float beta); -template T renyiDivergence(const T *x, const T *y, const int length, float alpha) { +template T renyiDivergenceSlow(const T *x, const T *y, + const int length, float alpha) { T sum = 0; T eps = -1e-6; float t = alpha-1; @@ -93,8 +98,8 @@ template T renyiDivergence(const T *x, const T *y, const int length return max(0, res); } -template float renyiDivergence(const float* x, const float* y, const int length, float alpha); -template double renyiDivergence(const double* x, const double* y, const int length, float alpha); +template float renyiDivergenceSlow(const float* x, const float* y, const int length, float alpha); +template double renyiDivergenceSlow(const double* x, const double* y, const int length, float alpha); template T renyiDivergenceFast(const T *x, const T *y, const int length, float alpha) { float t = alpha-1; diff --git a/similarity_search/src/space/space_ab_diverg.cc b/similarity_search/src/space/space_ab_diverg.cc index 82984f6..a50ec1d 100644 --- a/similarity_search/src/space/space_ab_diverg.cc +++ b/similarity_search/src/space/space_ab_diverg.cc @@ -26,35 +26,67 @@ namespace similarity { template -dist_t SpaceAlphaBetaDiverg::HiddenDistance(const Object* obj1, const Object* obj2) const { +dist_t SpaceAlphaBetaDivergSlow::HiddenDistance(const Object* obj1, const Object* obj2) const { CHECK(obj1->datalength() > 0); CHECK(obj1->datalength() == obj2->datalength()); const dist_t* x = reinterpret_cast(obj1->data()); const dist_t* y = reinterpret_cast(obj2->data()); const size_t length = obj1->datalength() / sizeof(dist_t); - return alphaBetaDivergence(x, y, length, alpha_, beta_); + return alphaBetaDivergenceSlow(x, y, length, alpha_, beta_); } template -dist_t SpaceAlphaBetaDiverg::ProxyDistance(const Object* obj1, const Object* obj2) const { +dist_t SpaceAlphaBetaDivergSlow::ProxyDistance(const Object* obj1, const Object* obj2) const { CHECK(obj1->datalength() > 0); CHECK(obj1->datalength() == obj2->datalength()); const dist_t* x = reinterpret_cast(obj1->data()); const dist_t* y = reinterpret_cast(obj2->data()); const size_t length = obj1->datalength() / sizeof(dist_t); - return alphaBetaDivergenceProxy(x, y, length, alpha_, beta_); + return alphaBetaDivergenceSlowProxy(x, y, length, alpha_, beta_); } template -std::string SpaceAlphaBetaDiverg::StrDesc() const { +std::string SpaceAlphaBetaDivergSlow::StrDesc() const { std::stringstream stream; - stream << SPACE_AB_DIVERG << ":alpha=" << alpha_ << ",beta=" << beta_; + stream << SPACE_AB_DIVERG_SLOW << ":alpha=" << alpha_ << ",beta=" << beta_; return stream.str(); } -template class SpaceAlphaBetaDiverg; -template class SpaceAlphaBetaDiverg; +template class SpaceAlphaBetaDivergSlow; +template class SpaceAlphaBetaDivergSlow; + +template +dist_t SpaceAlphaBetaDivergFast::HiddenDistance(const Object* obj1, const Object* obj2) const { + CHECK(obj1->datalength() > 0); + CHECK(obj1->datalength() == obj2->datalength()); + const dist_t* x = reinterpret_cast(obj1->data()); + const dist_t* y = reinterpret_cast(obj2->data()); + const size_t length = obj1->datalength() / sizeof(dist_t); + + return alphaBetaDivergenceFast(x, y, length, alpha_, beta_); +} + +template +dist_t SpaceAlphaBetaDivergFast::ProxyDistance(const Object* obj1, const Object* obj2) const { + CHECK(obj1->datalength() > 0); + CHECK(obj1->datalength() == obj2->datalength()); + const dist_t* x = reinterpret_cast(obj1->data()); + const dist_t* y = reinterpret_cast(obj2->data()); + const size_t length = obj1->datalength() / sizeof(dist_t); + + return alphaBetaDivergenceFastProxy(x, y, length, alpha_, beta_); +} + +template +std::string SpaceAlphaBetaDivergFast::StrDesc() const { + std::stringstream stream; + stream << SPACE_AB_DIVERG_FAST << ":alpha=" << alpha_ << ",beta=" << beta_; + return stream.str(); +} + +template class SpaceAlphaBetaDivergFast; +template class SpaceAlphaBetaDivergFast; } // namespace similarity diff --git a/similarity_search/src/space/space_renyi_diverg.cc b/similarity_search/src/space/space_renyi_diverg.cc index 32e2067..722009d 100644 --- a/similarity_search/src/space/space_renyi_diverg.cc +++ b/similarity_search/src/space/space_renyi_diverg.cc @@ -33,7 +33,7 @@ dist_t SpaceRenyiDivergSlow::HiddenDistance(const Object* obj1, const Ob const dist_t* y = reinterpret_cast(obj2->data()); const size_t length = obj1->datalength() / sizeof(dist_t); - return renyiDivergence(x, y, length, alpha_); + return renyiDivergenceSlow(x, y, length, alpha_); } template diff --git a/similarity_search/test/test_distfunc.cc b/similarity_search/test/test_distfunc.cc index 6ddde93..ef56c13 100644 --- a/similarity_search/test/test_distfunc.cc +++ b/similarity_search/test/test_distfunc.cc @@ -575,7 +575,7 @@ bool TestRenyiDivAgree(size_t N, size_t dim, size_t Rep, T alpha) { Normalize(pVect1, dim); Normalize(pVect2, dim); - T val0 = renyiDivergence(pVect1, pVect2, dim, alpha); + T val0 = renyiDivergenceSlow(pVect1, pVect2, dim, alpha); T val1 = renyiDivergenceFast(pVect1, pVect2, dim, alpha); bool bug = false; @@ -605,6 +605,53 @@ bool TestRenyiDivAgree(size_t N, size_t dim, size_t Rep, T alpha) { return true; } +template +bool TestAlphaBetaDivAgree(size_t N, size_t dim, size_t Rep, T alpha, T beta) { + vector vect1(dim), vect2(dim); + T* pVect1 = &vect1[0]; + T* pVect2 = &vect2[0]; + + T Dist = 0; + T Error = 0; + T TotalQty = 0; + + for (size_t i = 0; i < Rep; ++i) { + for (size_t j = 1; j < N; ++j) { + GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true); + GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true); + + Normalize(pVect1, dim); + Normalize(pVect2, dim); + + T val0 = alphaBetaDivergenceSlow(pVect1, pVect2, dim, alpha, beta); + T val1 = alphaBetaDivergenceFast(pVect1, pVect2, dim, alpha, beta); + + bool bug = false; + + T AbsDiff1 = fabs(val1 - val0); + T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18)); + + Error += AbsDiff1; + ++TotalQty; + + if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) { + cerr << "Bug alpha-beta Div. (1) " << typeid(T).name() << " !!! Dim = " << dim + << "alpha=" << alpha << " val0 = " << val0 << " val1 = " << val1 + << " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1 + << " AbsDiff1: " << AbsDiff1 << endl; + bug = true; + } + + if (bug) return false; + } + } + + LOG(LIB_INFO) << typeid(T).name() << " alpha-beta div. approximation error: average absolute: " << Error / TotalQty << " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist; + + + return true; +} + bool TestSpearmanFootruleAgree(size_t N, size_t dim, size_t Rep) { vector vect1(dim), vect2(dim); PivotIdType* pVect1 = &vect1[0]; @@ -1098,6 +1145,17 @@ TEST(TestAgree) { TestRenyiDivAgree(1024, dim, 10, alpha); } + for (float alpha = -2; alpha <= 2; alpha += 0.5) + for (float beta = -2; beta <= 2; beta += 0.5) + { + TestAlphaBetaDivAgree(1024, dim, 10, alpha, beta); + } + + for (double alpha = -2; alpha <= 2; alpha += 0.5) + for (double beta = -2; beta <= 2; beta += 0.5) + { + TestAlphaBetaDivAgree(1024, dim, 10, alpha, beta); + } } nTest++;