Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Issue #280. Another attempt to fix Travis compilation error.
  • Loading branch information
searchivairus committed Feb 7, 2018
1 parent 959381c commit e53183c
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 4 deletions.
1 change: 1 addition & 0 deletions similarity_search/include/distcomp.h
Expand Up @@ -256,6 +256,7 @@ const uint_fast32_t SIFT_DIM = 128;
// All SIFT vectors are expected to have the same dimensionality (SIFT_DIM)
DistTypeSIFT l2SqrSIFTNaive(const uint8_t* pVect1, const uint8_t* pVect2);
DistTypeSIFT l2SqrSIFTPrecomp(const uint8_t* pVect1, const uint8_t* pVect2);
DistTypeSIFT l2SqrSIFTPrecompSSE2(const uint8_t* pVect1, const uint8_t* pVect2);
DistTypeSIFT l2SqrSIFTPrecompAVX(const uint8_t* pVect1, const uint8_t* pVect2);

}
Expand Down
4 changes: 4 additions & 0 deletions similarity_search/include/portable_intrinsics.h
Expand Up @@ -32,6 +32,10 @@
#define PORTABLE_AVX
#endif

#if defined(__AVX2__)
#define PORTABLE_AVX2
#endif


#if defined(PORTABLE_SSE2)
#include <portable_simd.h>
Expand Down
61 changes: 57 additions & 4 deletions similarity_search/src/distcomp_l2sqr_sift.cc
Expand Up @@ -31,7 +31,7 @@ DistTypeSIFT l2SqrSIFTNaive(const uint8_t* pVect1,
const uint8_t* pVect2) {
DistTypeSIFT res = 0;
for (uint_fast32_t i = 0; i < SIFT_DIM; ++i) {
DistTypeSIFT d = DistTypeSIFT(pVect1[i]) - DistTypeSIFT(pVect1[i]);
DistTypeSIFT d = DistTypeSIFT(pVect1[i]) - DistTypeSIFT(pVect2[i]);
res += d*d;
}

Expand All @@ -49,11 +49,64 @@ DistTypeSIFT l2SqrSIFTPrecomp(const uint8_t* pVect1,
*reinterpret_cast<const DistTypeSIFT*>(pVect2 + SIFT_DIM) - 2 * sumProd;
}

DistTypeSIFT l2SqrSIFTPrecompSSE2(const uint8_t* pVect1,
const uint8_t* pVect2) {
#ifndef PORTABLE_SSE2
#pragma message WARN("l2SqrSIFTPrecompSSE4: SSE2 is not available")
return l2SqrSIFTPrecomp(pVect1, pVect2);
#else
const unsigned dim = SIFT_DIM;

DistTypeSIFT sumProd = 0;

size_t sse_offset = (dim / 16) * 16;

const __m128i* pStart1 = reinterpret_cast<const __m128i*>(pVect1);
const __m128i* pStart2 = reinterpret_cast<const __m128i*>(pVect2);
const __m128i* pEnd2 = reinterpret_cast<const __m128i*>(pVect1 + sse_offset);

__m128i zero, x1, y1;
zero = _mm_xor_si128(zero,zero);
__m128i sum = zero;

PORTABLE_ALIGN32 int32_t unpack[4];


while (pStart1 < pEnd2) {
const __m128i x = _mm_loadu_si128(pStart1++);
const __m128i y = _mm_loadu_si128(pStart2++);
x1 = _mm_unpackhi_epi8(x,zero);
y1 = _mm_unpackhi_epi8(y,zero);
sum = _mm_add_epi32(sum, _mm_madd_epi16(x1, y1));
x1 = _mm_unpacklo_epi8(x,zero);
y1 = _mm_unpacklo_epi8(y,zero);
sum = _mm_add_epi32(sum, _mm_madd_epi16(x1, y1));
}
_mm_store_si128((__m128i *)unpack, sum);
sumProd += unpack[0] + unpack[1] + unpack[2] + unpack[3];

if (dim & 16) {
for (uint_fast32_t i = sse_offset; i < dim; ++i) {
sumProd += DistTypeSIFT(pVect1[i])*DistTypeSIFT(pVect2[i]);
}
}

return
*reinterpret_cast<const DistTypeSIFT*>(pVect1 + dim) +
*reinterpret_cast<const DistTypeSIFT*>(pVect2 + dim) - 2*sumProd;
#endif
}

DistTypeSIFT l2SqrSIFTPrecompAVX(const uint8_t* pVect1,
const uint8_t* pVect2) {
#ifndef PORTABLE_AVX
#pragma message WARN("l2_sqrt_sift_precomp_avx: AVX is not available, defaulting to pure C++ implementation!")
return l2SqrSIFTPrecomp(pVect1, pVect2);
#ifndef PORTABLE_AVX2
#pragma message WARN("l2SqrSIFTPrecompAVX: AVX2 is not available")
#ifndef PORTABLE_SSE4
#pragma message WARN("l2SqrSIFTPrecompAVX: SSE4 is not available")
return l2SqrSIFTPrecomp(pVect1, pVect2);
#else
return l2SqrSIFTPrecompSSE2(pVect1, pVect2);
#endif
#else
const unsigned dim = SIFT_DIM;

Expand Down

0 comments on commit e53183c

Please sign in to comment.