Skip to content

Commit

Permalink
Intermediate commit for SIFT space.
Browse files Browse the repository at this point in the history
  • Loading branch information
searchivairus committed Feb 7, 2018
1 parent bff8bc0 commit 7281031
Show file tree
Hide file tree
Showing 9 changed files with 418 additions and 13 deletions.
10 changes: 10 additions & 0 deletions scripts/update_all_files_header.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#/bin/bash
tmpn=`mktemp`

for suff in h cc cpp c ; do
for f in `find . -name *.$suff` ; do
echo $f
scripts/update_file_header.py $f $tmpn
done
done

60 changes: 60 additions & 0 deletions scripts/update_file_header.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
import sys
import os
import datetime

now = datetime.datetime.now()

def printErr(str):
sys.stderr.write(str + '\n')

if len(sys.argv) != 3:
printErr("PARAMETERS: <input file> <output file>")
sys.exit(1)

inFileName = sys.argv[1]
outFileName = sys.argv[2]

outFile = open(outFileName, 'w')
inFile = open(inFileName, 'r')

lines = inFile.readlines()

mainDevelList = 'Bilegsaikhan Naidan, Leonid Boytsov, Yury Malkov, Ben Frederickson, David Novak'

newHeader = """/**
* Non-metric Space Library
*
* Main developers: %s
*
* For the complete list of contributors and further details see:
* https://github.com/searchivarius/NonMetricSpaceLib
*
* Copyright (c) 2013-%d
*
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
*/"""

lenQty = len(lines)

headEnd = None
if lines[0].strip() == '/**' and lines[1].strip() == '* Non-metric Space Library':
for ln in range(lenQty):
if lines[ln].strip() == '*/':
headEnd = ln
break
if headEnd is None:
printErr('Cannot find the end of the template header in the file %s' % inFileName)
sys.exit(1)
else:
print('WRANING Cannot find a template header in the file %s, IGNORING' % inFileName)
sys.exit(0)

outFile.write(newHeader % (mainDevelList, now.year))

for ln in range(headEnd + 1, lenQty):
outFile.write(lines[ln])

outFile.close()
10 changes: 10 additions & 0 deletions similarity_search/include/distcomp.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,5 +256,15 @@ inline float JaccardSparse(const IdType *pArr1, size_t qty1, const IdType *pArr2
*/
#include "distcomp_edist.h"

// For SIFT vectors (whose dim=128) int is enough to store the smallest and the largest difference
typedef int DistTypeSIFT;

const uint_fast32_t SIFT_DIM = 128;

// All SIFT vectors are expected to have the same dimensionality (SIFT_DIM)
DistTypeSIFT l2SqrSIFTNaive(const uint8_t* pVect1, const uint8_t* pVect2);
DistTypeSIFT l2SqrSIFTPrecomp(const uint8_t* pVect1, const uint8_t* pVect2);
DistTypeSIFT l2SqrSIFTPrecompAVX(const uint8_t* pVect1, const uint8_t* pVect2);


#endif
11 changes: 4 additions & 7 deletions similarity_search/include/portable_align.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@
*/

#if defined(__GNUC__)
#define PORTABLE_ALIGN16 __attribute__((aligned(16)))
#define PORTABLE_ALIGN16 __attribute__((aligned(16)))
#define PORTABLE_ALIGN32 __attribute__((aligned(32)))
#else
#define PORTABLE_ALIGN16 __declspec(align(16))
#endif
#if defined(__GNUC__)
#define PORTABLE_ALIGN32 __attribute__((aligned(32)))
#else
#define PORTABLE_ALIGN32 __declspec(align(32))
#define PORTABLE_ALIGN16 __declspec(align(16))
#define PORTABLE_ALIGN32 __declspec(align(32))
#endif
4 changes: 4 additions & 0 deletions similarity_search/include/portable_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
#define PORTABLE_SSE4
#endif

#if defined(__AVX__)
#define PORTABLE_AVX
#endif


#if defined(PORTABLE_SSE4)
#include <portable_simd.h>
Expand Down
78 changes: 78 additions & 0 deletions similarity_search/include/space/space_sift_vector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/**
* Non-metric Space Library
*
* Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info).
* With contributions from Lawrence Cayton (http://lcayton.com/) and others.
*
* For the complete list of contributors and further details see:
* https://github.com/searchivarius/NonMetricSpaceLib
*
* Copyright (c) 2018
*
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
*/

#ifndef _SPACE_SIFT_VECTOR_H_
#define _SPACE_SIFT_VECTOR_H_

#include <string>
#include <map>
#include <stdexcept>
#include <sstream>
#include <memory>

#include <string.h>
#include "global.h"
#include "object.h"
#include "utils.h"
#include "space.h"
#include "distcomp.h"

namespace similarity {

using std::string;
using std::unique_ptr;

class SiftVectorSpace : public Space<DistTypeSift> {
public:
explicit SiftVectorSpace() {}
virtual ~SiftVectorSpace() {}

/** Standard functions to read/write/create objects */
virtual unique_ptr<Object> CreateObjFromStr(IdType id, LabelType label, const string& s,
DataFileInputState* pInpState) const;
// Create a string representation of an object.
virtual string CreateStrFromObj(const Object* pObj, const string& externId /* ignored */) const;
// Open a file for reading, fetch a header (if there is any) and memorize an input state
virtual unique_ptr<DataFileInputState> OpenReadFileHeader(const string& inputFile) const;
// Open a file for writing, write a header (if there is any) and memorize an output state
virtual unique_ptr<DataFileOutputState> OpenWriteFileHeader(const ObjectVector& dataset,
const string& outputFile) const;
/*
* Read a string representation of the next object in a file as well
* as its label. Return false, on EOF.
*/
virtual bool ReadNextObjStr(DataFileInputState &, string& strObj, LabelType& label, string& externId) const;
/** End of standard functions to read/write/create objects */

virtual Object* CreateObjFromUint8Vect(IdType id, LabelType label, const std::vector<uint8_t>& InpVect) const;
virtual size_t GetElemQty(const Object* object) const { return SIFT_DIM; }

static void ReadUint8Vec(std::string line, LabelType& label, std::vector<uint8_t>& v);

protected:
DISABLE_COPY_AND_ASSIGN(VectorSpace);

virtual DistTypeSift HiddenDistance(const Object* obj1, const Object* obj2) const override {
const uint8_t* pVect1 = reinterpret_cast<const uint8_t*>(obj1->data());
const uint8_t* pVect2 = reinterpret_cast<const uint8_t*>(obj2->data());

return l2SqrSIFTPrecompAVX(pVect1, pVect2);
}
};

} // namespace similarity

#endif
10 changes: 4 additions & 6 deletions similarity_search/include/utils.h
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
/**
* Non-metric Space Library
*
* Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info).
* With contributions from Lawrence Cayton (http://lcayton.com/) and others.
* Main developers: Bilegsaikhan Naidan, Leonid Boytsov, Yury Malkov, Ben Frederickson, David Novak
*
* For the complete list of contributors and further details see:
* https://github.com/searchivarius/NonMetricSpaceLib
*
* Copyright (c) 2014
* https://github.com/searchivarius/NonMetricSpaceLib
*
* Copyright (c) 2013-2018
*
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
*/

#ifndef _UTILS_H_
#define _UTILS_H_

Expand Down
101 changes: 101 additions & 0 deletions similarity_search/src/distcomp_sift_l2sqr.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/**
* Non-metric Space Library
*
* Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info).
* With contributions from Lawrence Cayton (http://lcayton.com/) and others.
*
* For the complete list of contributors and further details see:
* https://github.com/searchivarius/NonMetricSpaceLib
*
* Copyright (c) 2014
*
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
*/
#include "distcomp.h"
#include "logging.h"
#include "utils.h"
#include "pow.h"
#include "portable_intrinsics.h"

#include <cstdlib>
#include <limits>
#include <algorithm>
#include <cmath>

namespace similarity {

using namespace std;

DistTypeSIFT l2SqrSIFTNaive(const uint8_t* pVect1,
const uint8_t* pVect2) {
DistTypeSIFT res = 0;
for (uint_fast32_t i = 0; i < SIFT_DIM; ++i) {
DistTypeSIFT d = DistTypeSIFT(pVect1[i]) - DistTypeSIFT(pVect1[i]);
res += d*d;
}

return res;
}

DistTypeSIFT l2SqrSIFTPrecomp(const uint8_t* pVect1,
const uint8_t* pVect2) {
DistTypeSIFT sumProd = 0;
for (uint_fast32_t i = 0; i < SIFT_DIM; ++i) {
sumProd += DistTypeSIFT(pVect1[i]) * DistTypeSIFT(pVect2[i]);
}

return *reinterpret_cast<const DistTypeSIFT*>(pVect1 + SIFT_DIM) +
*reinterpret_cast<const DistTypeSIFT*>(pVect2 + SIFT_DIM) - 2 * sumProd;
}

DistTypeSIFT l2SqrSIFTPrecompAVX(const uint8_t* pVect1,
const uint8_t* pVect2) {
#ifndef PORTABLE_AVX
#pragma message WARN("l2_sqrt_sift_precomp_avx: AVX is not available, defaulting to pure C++ implementation!")
return l2SqrSIFTPrecomp(pVect1, pVect2);
#else
const unsigned dim = SIFT_DIM;

DistTypeSIFT sumProd = 0;

size_t sse_offset = (dim / 32) * 32;

const __m256i* pStart1 = reinterpret_cast<const __m256i*>(pVect1);
const __m256i* pStart2 = reinterpret_cast<const __m256i*>(pVect2);
const __m256i* pEnd1 = reinterpret_cast<const __m256i*>(pVect1 + sse_offset);

__m256i zero, x1, y1;
zero = _mm256_xor_si256(zero,zero);
__m256i sum = zero;

int32_t PORTABLE_ALIGN32 unpack[8];

while (pStart1 < pEnd1) {
const __m256i x = _mm256_loadu_si256(pStart1++);
const __m256i y = _mm256_loadu_si256(pStart2++);
x1 = _mm256_unpackhi_epi8(x,zero);
y1 = _mm256_unpackhi_epi8(y,zero);
sum = _mm256_add_epi32(sum, _mm256_madd_epi16(x1, y1));
x1 = _mm256_unpacklo_epi8(x,zero);
y1 = _mm256_unpacklo_epi8(y,zero);
sum = _mm256_add_epi32(sum, _mm256_madd_epi16(x1, y1));
}
_mm256_store_si256((__m256i *)unpack, sum);
sumProd += unpack[0] + unpack[1] + unpack[2] + unpack[3] +
unpack[4] + unpack[5] + unpack[6] + unpack[7];

if (dim & 32) {
for (uint_fast32_t i = sse_offset; i < dim; ++i) {
sumProd += DistTypeSIFT(pVect1[i]) * DistTypeSIFT(pVect2[i]);
}
}

return
*reinterpret_cast<const DistTypeSIFT*>(pVect1+dim) +
*reinterpret_cast<const DistTypeSIFT*>(pVect2+dim) - 2*sumProd;
#endif
}

}
Loading

0 comments on commit 7281031

Please sign in to comment.