Skip to content

Commit

Permalink
use 32 bit for bit_jaccard for n ow b/c 64 bit causes mysterious prob…
Browse files Browse the repository at this point in the history
…lems
  • Loading branch information
Greg Friedland committed Feb 17, 2019
1 parent 1f3b16d commit ea72c88
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 12 deletions.
1 change: 1 addition & 0 deletions python_bindings/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
if os.path.exists(library_file):
# if we have a prebuilt nmslib library file, use that.
extra_objects.append(library_file)
print("Found: " + os.path.abspath(library_file))
else:
raise RuntimeError("can't find prebuild lib: " + os.path.abspath(library_file))
# # Otherwise build all the files here directly (excluding extras which need eigen/boost)
Expand Down
34 changes: 25 additions & 9 deletions python_bindings/tests/bindings_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy.testing as npt

import nmslib
import psutil


def get_exact_cosine(row, data, N=10):
Expand Down Expand Up @@ -92,21 +93,30 @@ def _get_index(self, space='bit_jaccard'):
raise NotImplementedError()

def testKnnQuery(self):
np.random.seed(23)
nbits = 128
nbits = 2048
chunk_size = 1000

ps_proc = psutil.Process()
print(f"\n{ps_proc.memory_info()}")
index = self._get_index()

for i in range(100):
a = np.random.rand(nbits) > 0.5
s = " ".join(["1" if e else "0" for e in a])
index.addDataPoint(id=i, data=s)
np.random.seed(23)
for i in range(0, 10000, chunk_size):
strs = []
for j in range(chunk_size):
a = np.random.rand(nbits) > 0.5
s = " ".join(["1" if e else "0" for e in a])
strs.append(s)
index.addDataPointBatch(ids=np.arange(i, i + chunk_size), data=strs)

print(f"\n{ps_proc.memory_info()}")
index.createIndex()
print(f"\n{ps_proc.memory_info()}")

a = np.ones(nbits)
s = " ".join(["1" if e else "0" for e in a])
ids, distances = index.knnQuery(s, k=10)
print(ids)
# print(ids)
print(distances)
# self.assertTrue(get_hitrate(get_exact_cosine(row, data), ids) >= 5)

Expand Down Expand Up @@ -170,8 +180,14 @@ def _get_index(self, space='cosinesimil'):

class BitJaccardTestCase(unittest.TestCase, BitVectorIndexTestMixin):
def _get_index(self, space='bit_jaccard'):
return nmslib.init(method='hnsw', space='bit_jaccard', data_type=nmslib.DataType.OBJECT_AS_STRING,
dtype=nmslib.DistType.DOUBLE)
return nmslib.init(method='hnsw', space=space, data_type=nmslib.DataType.OBJECT_AS_STRING,
dtype=nmslib.DistType.FLOAT)


class SparseJaccardTestCase(unittest.TestCase, BitVectorIndexTestMixin):
def _get_index(self, space='jaccard_sparse'):
return nmslib.init(method='hnsw', space=space, data_type=nmslib.DataType.OBJECT_AS_STRING,
dtype=nmslib.DistType.FLOAT)


# class BitHammingTestCase(unittest.TestCase, BitVectorIndexTestMixin):
Expand Down
4 changes: 2 additions & 2 deletions similarity_search/include/factory/init_spaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ inline void initSpaces() {
// Registering binary/bit Hamming/Jaccard
SpaceFactoryRegistry<int>::CreateFuncPtr bit_hamming_func_ptr = CreateBitHamming<int,uint32_t>;
REGISTER_SPACE_CREATOR(int, SPACE_BIT_HAMMING, bit_hamming_func_ptr )
SpaceFactoryRegistry<double>::CreateFuncPtr bit_jaccard_func_ptr = CreateBitJaccard<double,uint64_t>;
REGISTER_SPACE_CREATOR(double, SPACE_BIT_JACCARD, bit_jaccard_func_ptr )
SpaceFactoryRegistry<float>::CreateFuncPtr bit_jaccard_func_ptr = CreateBitJaccard<float,uint32_t>;
REGISTER_SPACE_CREATOR(float, SPACE_BIT_JACCARD, bit_jaccard_func_ptr )

// Registering the Levensthein-distance: regular and normalized
REGISTER_SPACE_CREATOR(int, SPACE_LEVENSHTEIN, CreateLevenshtein)
Expand Down
2 changes: 1 addition & 1 deletion similarity_search/test/test_space_serial.cc
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ TEST(Test_BitJaccard) {
testVect.push_back(ss.str());
}
for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) {
EXPECT_EQ(true, fullTest<double>(testVect, maxNumRec, "tmp_out_file.txt", "bit_jaccard", emptyParams, false));
EXPECT_EQ(true, fullTest<float>(testVect, maxNumRec, "tmp_out_file.txt", "bit_jaccard", emptyParams, false));
}
}

Expand Down

0 comments on commit ea72c88

Please sign in to comment.