diff --git a/python_bindings/setup.py b/python_bindings/setup.py index 0808994..c01800e 100755 --- a/python_bindings/setup.py +++ b/python_bindings/setup.py @@ -21,6 +21,7 @@ if os.path.exists(library_file): # if we have a prebuilt nmslib library file, use that. extra_objects.append(library_file) + print("Found: " + os.path.abspath(library_file)) else: raise RuntimeError("can't find prebuild lib: " + os.path.abspath(library_file)) # # Otherwise build all the files here directly (excluding extras which need eigen/boost) diff --git a/python_bindings/tests/bindings_test.py b/python_bindings/tests/bindings_test.py index f134738..a42939e 100644 --- a/python_bindings/tests/bindings_test.py +++ b/python_bindings/tests/bindings_test.py @@ -6,6 +6,7 @@ import numpy.testing as npt import nmslib +import psutil def get_exact_cosine(row, data, N=10): @@ -92,21 +93,30 @@ def _get_index(self, space='bit_jaccard'): raise NotImplementedError() def testKnnQuery(self): - np.random.seed(23) - nbits = 128 + nbits = 2048 + chunk_size = 1000 + ps_proc = psutil.Process() + print(f"\n{ps_proc.memory_info()}") index = self._get_index() - for i in range(100): - a = np.random.rand(nbits) > 0.5 - s = " ".join(["1" if e else "0" for e in a]) - index.addDataPoint(id=i, data=s) + np.random.seed(23) + for i in range(0, 10000, chunk_size): + strs = [] + for j in range(chunk_size): + a = np.random.rand(nbits) > 0.5 + s = " ".join(["1" if e else "0" for e in a]) + strs.append(s) + index.addDataPointBatch(ids=np.arange(i, i + chunk_size), data=strs) + + print(f"\n{ps_proc.memory_info()}") index.createIndex() + print(f"\n{ps_proc.memory_info()}") a = np.ones(nbits) s = " ".join(["1" if e else "0" for e in a]) ids, distances = index.knnQuery(s, k=10) - print(ids) + # print(ids) print(distances) # self.assertTrue(get_hitrate(get_exact_cosine(row, data), ids) >= 5) @@ -170,8 +180,14 @@ def _get_index(self, space='cosinesimil'): class BitJaccardTestCase(unittest.TestCase, BitVectorIndexTestMixin): def _get_index(self, space='bit_jaccard'): - return nmslib.init(method='hnsw', space='bit_jaccard', data_type=nmslib.DataType.OBJECT_AS_STRING, - dtype=nmslib.DistType.DOUBLE) + return nmslib.init(method='hnsw', space=space, data_type=nmslib.DataType.OBJECT_AS_STRING, + dtype=nmslib.DistType.FLOAT) + + +class SparseJaccardTestCase(unittest.TestCase, BitVectorIndexTestMixin): + def _get_index(self, space='jaccard_sparse'): + return nmslib.init(method='hnsw', space=space, data_type=nmslib.DataType.OBJECT_AS_STRING, + dtype=nmslib.DistType.FLOAT) # class BitHammingTestCase(unittest.TestCase, BitVectorIndexTestMixin): diff --git a/similarity_search/include/factory/init_spaces.h b/similarity_search/include/factory/init_spaces.h index dd0aae7..8abee56 100644 --- a/similarity_search/include/factory/init_spaces.h +++ b/similarity_search/include/factory/init_spaces.h @@ -46,8 +46,8 @@ inline void initSpaces() { // Registering binary/bit Hamming/Jaccard SpaceFactoryRegistry::CreateFuncPtr bit_hamming_func_ptr = CreateBitHamming; REGISTER_SPACE_CREATOR(int, SPACE_BIT_HAMMING, bit_hamming_func_ptr ) - SpaceFactoryRegistry::CreateFuncPtr bit_jaccard_func_ptr = CreateBitJaccard; - REGISTER_SPACE_CREATOR(double, SPACE_BIT_JACCARD, bit_jaccard_func_ptr ) + SpaceFactoryRegistry::CreateFuncPtr bit_jaccard_func_ptr = CreateBitJaccard; + REGISTER_SPACE_CREATOR(float, SPACE_BIT_JACCARD, bit_jaccard_func_ptr ) // Registering the Levensthein-distance: regular and normalized REGISTER_SPACE_CREATOR(int, SPACE_LEVENSHTEIN, CreateLevenshtein) diff --git a/similarity_search/test/test_space_serial.cc b/similarity_search/test/test_space_serial.cc index e8db105..5dd36b7 100644 --- a/similarity_search/test/test_space_serial.cc +++ b/similarity_search/test/test_space_serial.cc @@ -231,7 +231,7 @@ TEST(Test_BitJaccard) { testVect.push_back(ss.str()); } for (size_t maxNumRec = 1; maxNumRec < MAX_NUM_REC; ++maxNumRec) { - EXPECT_EQ(true, fullTest(testVect, maxNumRec, "tmp_out_file.txt", "bit_jaccard", emptyParams, false)); + EXPECT_EQ(true, fullTest(testVect, maxNumRec, "tmp_out_file.txt", "bit_jaccard", emptyParams, false)); } }