Skip to content

Commit

Permalink
Removing unnecessary test
Browse files Browse the repository at this point in the history
  • Loading branch information
searchivairus committed Aug 4, 2017
1 parent aedc7f2 commit d327e79
Showing 1 changed file with 1 addition and 95 deletions.
96 changes: 1 addition & 95 deletions python_bindings/integration_tests/test_nmslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def gen_sparse_data():
np.savetxt('sample_sparse_queryset.txt', query, delimiter='\t')

def test_sparse_vector_fresh():
space_type = 'cosinesimil_sparse'
space_type = 'cosinesimil_sparse_fast'
space_param = []
method_name = 'small_world_rand'
index_name = method_name + '_sparse.index'
Expand Down Expand Up @@ -443,99 +443,6 @@ def test_object_as_string_fresh(batch=True):

nmslib.freeIndex(index)


def bench_sparse_vector(batch=True):
# delay importing these so CI can import module
from scipy.sparse import csr_matrix
from scipy.spatial import distance

dim = 20000
dataset = np.random.binomial(1, 0.01, size=(40000, dim))
queryset = np.random.binomial(1, 0.009, size=(1000, dim))

print('dataset[0]:', [[i, v] for i, v in enumerate(dataset[0]) if v > 0])

k = 3

q0 = queryset[0]
res = []
for i in range(dataset.shape[0]):
res.append([i, distance.cosine(q0, dataset[i,:])])
res.sort(key=lambda x: x[1])
print('q0 res', res[:k])

data_matrix = csr_matrix(dataset, dtype=np.float32)
query_matrix = csr_matrix(queryset, dtype=np.float32)

data_to_return = range(dataset.shape[0])

#space_type = 'cosinesimil_sparse'
space_type = 'cosinesimil_sparse_fast'
space_param = []
method_name = 'small_world_rand'
index_name = method_name + '_sparse.index'
if os.path.isfile(index_name):
os.remove(index_name)
index = nmslib.init(space_type,
space_param,
method_name,
nmslib.DataType.SPARSE_VECTOR,
nmslib.DistType.FLOAT)

if batch:
with TimeIt('batch add'):
positions = nmslib.addDataPointBatch(index, np.arange(len(dataset), dtype=np.int32), data_matrix)
print('positions', positions)
else:
d = []
q = []
with TimeIt('preparing'):
for data in dataset:
d.append([[i, v] for i, v in enumerate(data) if v > 0])
for data in queryset:
q.append([[i, v] for i, v in enumerate(data) if v > 0])
with TimeIt('adding points'):
for id, data in enumerate(d):
nmslib.addDataPoint(index, id, data)

print('Let\'s invoke the index-build process')

index_param = ['NN=17', 'efConstruction=50', 'indexThreadQty=4']
query_time_param = ['efSearch=50']

with TimeIt('building index'):
nmslib.createIndex(index, index_param)

print('The index is created')

nmslib.setQueryTimeParams(index,query_time_param)

print('Query time parameters are set')

print("Results for the freshly created index:")

with TimeIt('knn query'):
if batch:
num_threads = 10
res = nmslib.knnQueryBatch(index, num_threads, k, query_matrix)
for idx, v in enumerate(res):
if idx < 5:
print(idx, v)
if idx == 0:
for i in v:
print('q0', i, distance.cosine(q0, dataset[i,:]))
else:
for idx, data in enumerate(q):
res = nmslib.knnQuery(index, k, data)
if idx < 5:
print(idx, res)

nmslib.saveIndex(index, index_name)

print("The index %s is saved" % index_name)

nmslib.freeIndex(index)

if __name__ == '__main__':

print('DENSE_VECTOR', nmslib.DataType.DENSE_VECTOR)
Expand All @@ -562,5 +469,4 @@ def bench_sparse_vector(batch=True):
test_object_as_string_fresh()
test_object_as_string_fresh(False)

bench_sparse_vector()

0 comments on commit d327e79

Please sign in to comment.