Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
add map4 fingerprint
  • Loading branch information
ChunjiangZhu committed Jun 19, 2020
1 parent c0da55d commit 0e81a7b
Show file tree
Hide file tree
Showing 7 changed files with 348 additions and 59 deletions.
Binary file modified .DS_Store
Binary file not shown.
126 changes: 123 additions & 3 deletions algos.yaml
Expand Up @@ -26,7 +26,7 @@ bit:
singularity-tag: ann-bench-nmslib3
module: ann_benchmarks.algorithms.nmslib
constructor: NmslibReuseIndex
base-args: ["@metric", "vptree"]
base-args: ["@metric", "Byte", "vptree"]
run-groups:
base:
# When @args is a dictionary, algorithm instances will be generated
Expand All @@ -51,7 +51,7 @@ bit:
singularity-tag: ann-bench-nmslib3
module: ann_benchmarks.algorithms.nmslib
constructor: NmslibReuseIndex
base-args: ["@metric", "hnsw"]
base-args: ["@metric", "Byte", "hnsw"]
run-groups:
M-48:
arg-groups:
Expand Down Expand Up @@ -90,7 +90,7 @@ bit:
singularity-tag: ann-bench-nmslib3
module: ann_benchmarks.algorithms.nmslib
constructor: NmslibReuseIndex
base-args: ["@metric", "sw-graph"]
base-args: ["@metric", "Byte", "sw-graph"]
run-groups:
NN-96:
arg-groups:
Expand Down Expand Up @@ -186,3 +186,123 @@ bit:
run-groups:
empty:
args: []
int:
jaccard:
Bruteforce:
disabled: false
docker-tag: ann-benchmarks-sklearn
singularity-tag: ann-bench-sklearn
module: ann_benchmarks.algorithms.bruteforce
constructor: BruteForceBLAS
base-args: ["@metric"]
run-groups:
base:
args: {}
Hnsw(Nmslib):
disabled: false
docker-tag: ann-benchmarks-nmslib
singularity-tag: ann-bench-nmslib3
module: ann_benchmarks.algorithms.nmslib
constructor: NmslibReuseIndex
base-args: ["@metric", "Int", "hnsw"]
run-groups:
M-48:
arg-groups:
- {"M": 48, "post": 2, "efConstruction": 800}
- False
query-args: [[50, 70, 90, 120, 160, 200, 400, 600, 700, 800, 1000,
1400, 1600, 2000]]
M-32:
arg-groups:
- {"M": 32, "post": 2, "efConstruction": 800}
- False
query-args: [[100, 300, 500, 700, 1000, 1500, 2000]]
M-20:
arg-groups:
- {"M": 20, "post": 0, "efConstruction": 800}
- False
query-args: [[2, 5, 10, 15, 20, 30, 40, 50, 70, 80]]
M-12:
arg-groups:
- {"M": 12, "post": 0, "efConstruction": 800}
- False
query-args: [[1, 2, 5, 10, 15, 20, 30, 40, 50, 70, 80]]
M-5:
arg-groups:
- {"M": 5, "post": 0, "efConstruction": 10}
- False
query-args: [[1, 2, 5, 10]]
M-2:
arg-groups:
- {"M": 2, "post": 0, "efConstruction": 1}
- False
query-args: [[1, 2]]
SW-graph(Nmslib):
disabled: false
docker-tag: ann-benchmarks-nmslib
singularity-tag: ann-bench-nmslib3
module: ann_benchmarks.algorithms.nmslib
constructor: NmslibReuseIndex
base-args: ["@metric", "Int", "sw-graph"]
run-groups:
NN-96:
arg-groups:
- {"NN": 96}
- False
query-args: [[800, 400, 200, 100, 50, 30, 20, 15, 10, 5, 1]]
NN-48:
arg-groups:
- {"NN": 48}
- False
query-args: [[800, 400, 200, 100, 50, 30, 20, 15, 10, 5, 1]]
NN-24:
arg-groups:
- {"NN": 24}
- False
query-args: [[800, 400, 200, 100, 50, 30, 20, 15, 10, 5, 1]]
NN-16:
arg-groups:
- {"NN": 16}
- False
query-args: [[800, 400, 200, 100, 50, 30, 20, 15, 10, 5, 1]]
NN-10:
arg-groups:
- {"NN": 10}
- False
query-args: [[800, 400, 200, 100, 50, 30, 20, 15, 10, 5, 1]]
NN-5:
arg-groups:
- {"NN": 5}
- False
query-args: [[30, 25, 20, 15, 10, 5, 4, 3, 2, 1]]
NN-2:
arg-groups:
- {"NN": 2}
- False
query-args: [[30, 25, 20, 15, 10, 5, 4, 3, 2, 1]]
NN-1:
arg-groups:
- {"NN": 1}
- False
query-args: [[30, 25, 20, 15, 10, 5, 4, 3, 2, 1]]
Onng(Ngt):
disabled: false
docker-tag: ann-benchmarks-ngt
singularity-tag: ann-bench-ngt
module: ann_benchmarks.algorithms.onng_ngt
constructor: ONNG
base-args: ["@metric", "Byte", 1.0]
run-groups:
onng:
args: [[100, 300, 500, 1000], [10, 30, 50, 100], [10, 30, 50, 120]]
query-args: [[0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0]]
Risc:
disabled: false
docker-tag: ann-benchmarks-risc
singularity-tag: ann-bench-risc
module: ann_benchmarks.algorithms.risc
constructor: Risc
base-args: ["@metric", "Risc"]
run-groups:
empty:
args: []
Binary file modified ann_benchmarks/.DS_Store
Binary file not shown.
32 changes: 23 additions & 9 deletions ann_benchmarks/algorithms/nmslib.py
Expand Up @@ -24,9 +24,17 @@ class NmslibReuseIndex(BaseANN):
arr.sort()
res.append(' '.join([str(k) for k in arr]))
return res

@staticmethod
def intMatrToStrArray(intMatr):
res = []
for row in range(intMatr.shape[0]):
res.append(' '.join([str(k) for k in intMatr[row]]))
return res

def __init__(self, metric, method_name, index_param, query_param):
def __init__(self, metric, object_type, method_name, index_param, query_param):
self._nmslib_metric = {'angular': 'cosinesimil', 'euclidean': 'l2', 'jaccard': 'jaccard_sparse'}[metric]
self._object_type = object_type
self._method_name = method_name
self._save_index = False
self._index_param = NmslibReuseIndex.encode(index_param)
Expand All @@ -53,11 +61,11 @@ class NmslibReuseIndex(BaseANN):
# Aborted (core dumped)
self._index_param.append('bucketSize=%d' % min(int(X.shape[0] * 0.0005), 1000))

# Chunjiang modified it to "if" for jaccard
if self._nmslib_metric == 'jaccard_sparse':
X_trans = NmslibReuseIndex.matrToStrArray(csr_matrix(X))
self._index = nmslib.init(space=self._nmslib_metric, method=self._method_name, data_type=nmslib.DataType.OBJECT_AS_STRING)
self._index.addDataPointBatch(X_trans)
if self._object_type == 'Byte':
X_trans = NmslibReuseIndex.matrToStrArray(csr_matrix(X))
else:
X_trans = NmslibReuseIndex.intMatrToStrArray(X)
else:
self._index = nmslib.init(space=self._nmslib_metric, method=self._method_name)
self._index.addDataPointBatch(X)
Expand All @@ -79,9 +87,12 @@ class NmslibReuseIndex(BaseANN):
def query(self, v, n, rq=False):
# Chunjiang modified
if self._nmslib_metric == 'jaccard_sparse':
nz = numpy.nonzero(v)[0]
v = ' '.join([str(k) for k in nz])
#print(n)
if self._object_type == 'Byte':
nz = numpy.nonzero(v)[0]
v = ' '.join([str(k) for k in nz])
else:
v = ' '.join([str(k) for k in v])

if rq:
ids, distances = self._index.rangeQuery(v, n)
else:
Expand All @@ -91,7 +102,10 @@ class NmslibReuseIndex(BaseANN):
def batch_query(self, X, n):
# Chunjiang modified
if self._nmslib_metric == 'jaccard_sparse':
X = NmslibReuseIndex.matrToStrArray(csr_matrix(X))
if self._object_type == 'Byte':
X = NmslibReuseIndex.matrToStrArray(csr_matrix(X))
else:
X = NmslibReuseIndex.intMatrToStrArray(X)
self.res = self._index.knnQueryBatch(X, n)

def get_batch_results(self):
Expand Down

0 comments on commit 0e81a7b

Please sign in to comment.