Skip to content

Commit

Permalink
Finishing implementing more efficient versions of Renyi- and ab-diver…
Browse files Browse the repository at this point in the history
…gence (for nice powers like 0.5)
  • Loading branch information
searchivairus committed Jan 16, 2018
1 parent e4d3084 commit 759b828
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 31 deletions.
6 changes: 3 additions & 3 deletions similarity_search/include/distcomp.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,18 +198,18 @@ template <typename T> T LPGenericDistanceOptim(const T* x, const T* y, const int
* estimators for image classification”. In: Computer Vision and Pattern Recognition (CVPR), 2012 IEEE
* Conference on, pages 2989–2996
*/
template <typename T> T alphaBetaDivergence(const T *x, const T *y, const int length, float alpha, float beta);
template <typename T> T alphaBetaDivergenceSlow(const T *x, const T *y, const int length, float alpha,float beta);
template <typename T> T alphaBetaDivergenceFast(const T *x, const T *y, const int length, float alpha, float beta);

// A proxy function for alpha-beta divergence that may be used during indexing
template <typename T> T alphaBetaDivergenceProxy(const T *x, const T *y, const int length, float alpha, float beta);
template <typename T> T alphaBetaDivergenceSlowProxy(const T *x, const T *y, const int length, float alpha, float beta);
template <typename T> T alphaBetaDivergenceFastProxy(const T *x, const T *y, const int length, float alpha, float beta);
/*
* Renyi divergence.
* Rényi, Alfréd (1961). "On measures of information and entropy".
* Proceedings of the fourth Berkeley Symposium on Mathematics, Statistics and Probability 1960. pp. 547–561.
*/
template <typename T> T renyiDivergence(const T *x, const T *y, const int length, float alpha);
template <typename T> T renyiDivergenceSlow(const T *x, const T *y, const int length, float alpha);
template <typename T> T renyiDivergenceFast(const T *x, const T *y, const int length, float alpha);


Expand Down
6 changes: 4 additions & 2 deletions similarity_search/include/factory/init_spaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,10 @@ inline void initSpaces() {
REGISTER_SPACE_CREATOR(double, SPACE_SQFD_GAUSSIAN_FUNC, CreateSqfdGaussianFunc)
#endif

REGISTER_SPACE_CREATOR(float, SPACE_AB_DIVERG, CreateAlphaBetaDiverg)
REGISTER_SPACE_CREATOR(double, SPACE_AB_DIVERG, CreateAlphaBetaDiverg)
REGISTER_SPACE_CREATOR(float, SPACE_AB_DIVERG_SLOW, CreateAlphaBetaDivergSlow)
REGISTER_SPACE_CREATOR(double, SPACE_AB_DIVERG_SLOW, CreateAlphaBetaDivergSlow)
REGISTER_SPACE_CREATOR(float, SPACE_AB_DIVERG_FAST, CreateAlphaBetaDivergFast)
REGISTER_SPACE_CREATOR(double, SPACE_AB_DIVERG_FAST, CreateAlphaBetaDivergFast)

REGISTER_SPACE_CREATOR(float, SPACE_RENYI_DIVERG_SLOW, CreateRenyiDivergSlow)
REGISTER_SPACE_CREATOR(double, SPACE_RENYI_DIVERG_SLOW, CreateRenyiDivergSlow)
Expand Down
16 changes: 14 additions & 2 deletions similarity_search/include/factory/space/space_ab_diverg.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,27 @@
namespace similarity {

template <typename dist_t>
Space<dist_t>* CreateAlphaBetaDiverg(const AnyParams& AllParams) {
Space<dist_t>* CreateAlphaBetaDivergSlow(const AnyParams& AllParams) {
AnyParamManager pmgr(AllParams);

float alpha = 1.0, beta = 1.0;

pmgr.GetParamOptional("alpha", alpha, alpha);
pmgr.GetParamOptional("beta", beta, beta);

return new SpaceAlphaBetaDiverg<dist_t>(alpha, beta);
return new SpaceAlphaBetaDivergSlow<dist_t>(alpha, beta);
}

template <typename dist_t>
Space<dist_t>* CreateAlphaBetaDivergFast(const AnyParams& AllParams) {
AnyParamManager pmgr(AllParams);

float alpha = 1.0, beta = 1.0;

pmgr.GetParamOptional("alpha", alpha, alpha);
pmgr.GetParamOptional("beta", beta, beta);

return new SpaceAlphaBetaDivergFast<dist_t>(alpha, beta);
}

/*
Expand Down
26 changes: 21 additions & 5 deletions similarity_search/include/space/space_ab_diverg.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
#include "space_vector.h"
#include "distcomp.h"

#define SPACE_AB_DIVERG "ab_diverg"
#define SPACE_AB_DIVERG_SLOW "abdiv_slow"
#define SPACE_AB_DIVERG_FAST "abdiv_fast"

namespace similarity {

Expand All @@ -45,17 +46,32 @@ namespace similarity {
*/

template <typename dist_t>
class SpaceAlphaBetaDiverg : public VectorSpaceSimpleStorage<dist_t> {
class SpaceAlphaBetaDivergSlow : public VectorSpaceSimpleStorage<dist_t> {
public:
explicit SpaceAlphaBetaDiverg(float alpha, float beta) : alpha_(alpha), beta_(beta) {}
virtual ~SpaceAlphaBetaDiverg() {}
explicit SpaceAlphaBetaDivergSlow(float alpha, float beta) : alpha_(alpha), beta_(beta) {}
virtual ~SpaceAlphaBetaDivergSlow() {}

virtual std::string StrDesc() const override;
protected:
virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const override;
virtual dist_t ProxyDistance(const Object* obj1, const Object* obj2) const override;
private:
DISABLE_COPY_AND_ASSIGN(SpaceAlphaBetaDiverg);
DISABLE_COPY_AND_ASSIGN(SpaceAlphaBetaDivergSlow);
float alpha_, beta_;
};

template <typename dist_t>
class SpaceAlphaBetaDivergFast : public VectorSpaceSimpleStorage<dist_t> {
public:
explicit SpaceAlphaBetaDivergFast(float alpha, float beta) : alpha_(alpha), beta_(beta) {}
virtual ~SpaceAlphaBetaDivergFast() {}

virtual std::string StrDesc() const override;
protected:
virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const override;
virtual dist_t ProxyDistance(const Object* obj1, const Object* obj2) const override;
private:
DISABLE_COPY_AND_ASSIGN(SpaceAlphaBetaDivergFast);
float alpha_, beta_;
};

Expand Down
23 changes: 14 additions & 9 deletions similarity_search/src/distcomp_diverg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ namespace similarity {

using namespace std;

template <typename T> T alphaBetaDivergence(const T *x, const T *y, const int length, float alpha, float beta) {
template <typename T> T alphaBetaDivergenceSlow(const T *x, const T *y,
const int length, float alpha,
float beta) {
T res = 0;
float alphaPlus1 = alpha + 1;
for (int i = 0; i < length; ++i) {
Expand All @@ -37,8 +39,8 @@ template <typename T> T alphaBetaDivergence(const T *x, const T *y, const int le
return res;
}

template float alphaBetaDivergence(const float* x, const float* y, const int length, float alpha, float beta);
template double alphaBetaDivergence(const double* x, const double* y, const int length, float alpha, float beta);
template float alphaBetaDivergenceSlow(const float* x, const float* y, const int length, float alpha, float beta);
template double alphaBetaDivergenceSlow(const double* x, const double* y, const int length, float alpha, float beta);

template <typename T> T alphaBetaDivergenceFast(const T *x, const T *y, const int length, float alpha, float beta) {
T res = 0;
Expand All @@ -53,7 +55,9 @@ template <typename T> T alphaBetaDivergenceFast(const T *x, const T *y, const in
template float alphaBetaDivergenceFast(const float* x, const float* y, const int length, float alpha, float beta);
template double alphaBetaDivergenceFast(const double* x, const double* y, const int length, float alpha, float beta);

template <typename T> T alphaBetaDivergenceProxy(const T *x, const T *y, const int length, float alpha, float beta) {
template <typename T> T alphaBetaDivergenceSlowProxy(const T *x, const T *y,
const int length,
float alpha, float beta) {
T res = 0;
float alphaPlus1 = alpha + 1;
for (int i = 0; i < length; ++i) {
Expand All @@ -63,8 +67,8 @@ template <typename T> T alphaBetaDivergenceProxy(const T *x, const T *y, const i
return res;
}

template float alphaBetaDivergenceProxy(const float* x, const float* y, const int length, float alpha, float beta);
template double alphaBetaDivergenceProxy(const double* x, const double* y, const int length, float alpha, float beta);
template float alphaBetaDivergenceSlowProxy(const float* x, const float* y, const int length, float alpha, float beta);
template double alphaBetaDivergenceSlowProxy(const double* x, const double* y, const int length, float alpha, float beta);

template <typename T> T alphaBetaDivergenceFastProxy(const T *x, const T *y, const int length, float alpha, float beta) {
PowerProxyObject<T> powalphaPlus1(alpha + 1), powBeta(beta);
Expand All @@ -80,7 +84,8 @@ template <typename T> T alphaBetaDivergenceFastProxy(const T *x, const T *y, con
template float alphaBetaDivergenceFastProxy(const float* x, const float* y, const int length, float alpha, float beta);
template double alphaBetaDivergenceFastProxy(const double* x, const double* y, const int length, float alpha, float beta);

template <typename T> T renyiDivergence(const T *x, const T *y, const int length, float alpha) {
template <typename T> T renyiDivergenceSlow(const T *x, const T *y,
const int length, float alpha) {
T sum = 0;
T eps = -1e-6;
float t = alpha-1;
Expand All @@ -93,8 +98,8 @@ template <typename T> T renyiDivergence(const T *x, const T *y, const int length
return max<T>(0, res);
}

template float renyiDivergence(const float* x, const float* y, const int length, float alpha);
template double renyiDivergence(const double* x, const double* y, const int length, float alpha);
template float renyiDivergenceSlow(const float* x, const float* y, const int length, float alpha);
template double renyiDivergenceSlow(const double* x, const double* y, const int length, float alpha);

template <typename T> T renyiDivergenceFast(const T *x, const T *y, const int length, float alpha) {
float t = alpha-1;
Expand Down
48 changes: 40 additions & 8 deletions similarity_search/src/space/space_ab_diverg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,35 +26,67 @@
namespace similarity {

template <typename dist_t>
dist_t SpaceAlphaBetaDiverg<dist_t>::HiddenDistance(const Object* obj1, const Object* obj2) const {
dist_t SpaceAlphaBetaDivergSlow<dist_t>::HiddenDistance(const Object* obj1, const Object* obj2) const {
CHECK(obj1->datalength() > 0);
CHECK(obj1->datalength() == obj2->datalength());
const dist_t* x = reinterpret_cast<const dist_t*>(obj1->data());
const dist_t* y = reinterpret_cast<const dist_t*>(obj2->data());
const size_t length = obj1->datalength() / sizeof(dist_t);

return alphaBetaDivergence(x, y, length, alpha_, beta_);
return alphaBetaDivergenceSlow(x, y, length, alpha_, beta_);
}

template <typename dist_t>
dist_t SpaceAlphaBetaDiverg<dist_t>::ProxyDistance(const Object* obj1, const Object* obj2) const {
dist_t SpaceAlphaBetaDivergSlow<dist_t>::ProxyDistance(const Object* obj1, const Object* obj2) const {
CHECK(obj1->datalength() > 0);
CHECK(obj1->datalength() == obj2->datalength());
const dist_t* x = reinterpret_cast<const dist_t*>(obj1->data());
const dist_t* y = reinterpret_cast<const dist_t*>(obj2->data());
const size_t length = obj1->datalength() / sizeof(dist_t);

return alphaBetaDivergenceProxy(x, y, length, alpha_, beta_);
return alphaBetaDivergenceSlowProxy(x, y, length, alpha_, beta_);
}

template <typename dist_t>
std::string SpaceAlphaBetaDiverg<dist_t>::StrDesc() const {
std::string SpaceAlphaBetaDivergSlow<dist_t>::StrDesc() const {
std::stringstream stream;
stream << SPACE_AB_DIVERG << ":alpha=" << alpha_ << ",beta=" << beta_;
stream << SPACE_AB_DIVERG_SLOW << ":alpha=" << alpha_ << ",beta=" << beta_;
return stream.str();
}

template class SpaceAlphaBetaDiverg<float>;
template class SpaceAlphaBetaDiverg<double>;
template class SpaceAlphaBetaDivergSlow<float>;
template class SpaceAlphaBetaDivergSlow<double>;

template <typename dist_t>
dist_t SpaceAlphaBetaDivergFast<dist_t>::HiddenDistance(const Object* obj1, const Object* obj2) const {
CHECK(obj1->datalength() > 0);
CHECK(obj1->datalength() == obj2->datalength());
const dist_t* x = reinterpret_cast<const dist_t*>(obj1->data());
const dist_t* y = reinterpret_cast<const dist_t*>(obj2->data());
const size_t length = obj1->datalength() / sizeof(dist_t);

return alphaBetaDivergenceFast(x, y, length, alpha_, beta_);
}

template <typename dist_t>
dist_t SpaceAlphaBetaDivergFast<dist_t>::ProxyDistance(const Object* obj1, const Object* obj2) const {
CHECK(obj1->datalength() > 0);
CHECK(obj1->datalength() == obj2->datalength());
const dist_t* x = reinterpret_cast<const dist_t*>(obj1->data());
const dist_t* y = reinterpret_cast<const dist_t*>(obj2->data());
const size_t length = obj1->datalength() / sizeof(dist_t);

return alphaBetaDivergenceFastProxy(x, y, length, alpha_, beta_);
}

template <typename dist_t>
std::string SpaceAlphaBetaDivergFast<dist_t>::StrDesc() const {
std::stringstream stream;
stream << SPACE_AB_DIVERG_FAST << ":alpha=" << alpha_ << ",beta=" << beta_;
return stream.str();
}

template class SpaceAlphaBetaDivergFast<float>;
template class SpaceAlphaBetaDivergFast<double>;

} // namespace similarity
2 changes: 1 addition & 1 deletion similarity_search/src/space/space_renyi_diverg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ dist_t SpaceRenyiDivergSlow<dist_t>::HiddenDistance(const Object* obj1, const Ob
const dist_t* y = reinterpret_cast<const dist_t*>(obj2->data());
const size_t length = obj1->datalength() / sizeof(dist_t);

return renyiDivergence(x, y, length, alpha_);
return renyiDivergenceSlow(x, y, length, alpha_);
}

template <typename dist_t>
Expand Down
60 changes: 59 additions & 1 deletion similarity_search/test/test_distfunc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ bool TestRenyiDivAgree(size_t N, size_t dim, size_t Rep, T alpha) {
Normalize(pVect1, dim);
Normalize(pVect2, dim);

T val0 = renyiDivergence(pVect1, pVect2, dim, alpha);
T val0 = renyiDivergenceSlow(pVect1, pVect2, dim, alpha);
T val1 = renyiDivergenceFast(pVect1, pVect2, dim, alpha);

bool bug = false;
Expand Down Expand Up @@ -605,6 +605,53 @@ bool TestRenyiDivAgree(size_t N, size_t dim, size_t Rep, T alpha) {
return true;
}

template <class T>
bool TestAlphaBetaDivAgree(size_t N, size_t dim, size_t Rep, T alpha, T beta) {
vector<T> vect1(dim), vect2(dim);
T* pVect1 = &vect1[0];
T* pVect2 = &vect2[0];

T Dist = 0;
T Error = 0;
T TotalQty = 0;

for (size_t i = 0; i < Rep; ++i) {
for (size_t j = 1; j < N; ++j) {
GenRandVect(pVect1, dim, T(RANGE_SMALL), T(1.0), true);
GenRandVect(pVect2, dim, T(RANGE_SMALL), T(1.0), true);

Normalize(pVect1, dim);
Normalize(pVect2, dim);

T val0 = alphaBetaDivergenceSlow(pVect1, pVect2, dim, alpha, beta);
T val1 = alphaBetaDivergenceFast(pVect1, pVect2, dim, alpha, beta);

bool bug = false;

T AbsDiff1 = fabs(val1 - val0);
T RelDiff1 = AbsDiff1/max(max(fabs(val1),fabs(val0)),T(1e-18));

Error += AbsDiff1;
++TotalQty;

if (RelDiff1 > 1e-5 && AbsDiff1 > 1e-5) {
cerr << "Bug alpha-beta Div. (1) " << typeid(T).name() << " !!! Dim = " << dim
<< "alpha=" << alpha << " val0 = " << val0 << " val1 = " << val1
<< " Diff: " << (val0 - val1) << " RelDiff1: " << RelDiff1
<< " AbsDiff1: " << AbsDiff1 << endl;
bug = true;
}

if (bug) return false;
}
}

LOG(LIB_INFO) << typeid(T).name() << " alpha-beta div. approximation error: average absolute: " << Error / TotalQty << " avg. dist: " << Dist / TotalQty << " average relative: " << Error/Dist;


return true;
}

bool TestSpearmanFootruleAgree(size_t N, size_t dim, size_t Rep) {
vector<PivotIdType> vect1(dim), vect2(dim);
PivotIdType* pVect1 = &vect1[0];
Expand Down Expand Up @@ -1098,6 +1145,17 @@ TEST(TestAgree) {
TestRenyiDivAgree(1024, dim, 10, alpha);
}

for (float alpha = -2; alpha <= 2; alpha += 0.5)
for (float beta = -2; beta <= 2; beta += 0.5)
{
TestAlphaBetaDivAgree(1024, dim, 10, alpha, beta);
}

for (double alpha = -2; alpha <= 2; alpha += 0.5)
for (double beta = -2; beta <= 2; beta += 0.5)
{
TestAlphaBetaDivAgree(1024, dim, 10, alpha, beta);
}
}

nTest++;
Expand Down

0 comments on commit 759b828

Please sign in to comment.