diff --git a/ann_benchmarks/.DS_Store b/ann_benchmarks/.DS_Store index 5bdd401..f4d2c77 100644 Binary files a/ann_benchmarks/.DS_Store and b/ann_benchmarks/.DS_Store differ diff --git a/ann_benchmarks/algorithms/chemfp.py b/ann_benchmarks/algorithms/chemfp.py index e700a6e..3a6f29e 100644 --- a/ann_benchmarks/algorithms/chemfp.py +++ b/ann_benchmarks/algorithms/chemfp.py @@ -32,11 +32,16 @@ def pre_query(self, v, n): queryMatr = numpy.array([v]) self._queries = Chemfp.matrToArena(queryMatr) - def query(self, v, n): - self._results = chemfp.knearest_tanimoto_search(self._queries, self._target, k=n, threshold=0.0) + def query(self, v, n, rq=False): + if rq: + self._results = chemfp.threshold_tanimoto_search(self._queries, self._target, threshold=1.0-n) + else: + self._results = chemfp.knearest_tanimoto_search(self._queries, self._target, k=n, threshold=0.0) - def post_query(self): + def post_query(self, rq=False): # parse the results for (query_id, hits) in self._results: if hits: return hits.get_ids() + else: + return [] diff --git a/ann_benchmarks/plotting/metrics.py b/ann_benchmarks/plotting/metrics.py index cdf2800..fe3f0d0 100644 --- a/ann_benchmarks/plotting/metrics.py +++ b/ann_benchmarks/plotting/metrics.py @@ -13,7 +13,7 @@ def rangequery(dataset_distances, run_distances, radius, epsilon=1e-10): for true_distances, found_distances in zip(dataset_distances, run_distances): true = [d for d in true_distances if d <= radius + epsilon] found = [d for d in found_distances if d <= radius + epsilon] - print('found: ' + str(len(found)) + '/true: ' + str(len(true))) + #print('found: ' + str(len(found)) + '/true: ' + str(len(true))) if len(true) == 0: if len(found) == 0: total += 1.0 diff --git a/ann_benchmarks/runner.py b/ann_benchmarks/runner.py index f840c6b..9acaa4d 100644 --- a/ann_benchmarks/runner.py +++ b/ann_benchmarks/runner.py @@ -47,7 +47,7 @@ def single_query(v): # special code for the Risc, DivideSkip, and Chemfp if algoname in ['Risc', 'DivideSkip', 'Chemfp']: - candidates = algo.post_query() + candidates = algo.post_query(rq) if issparse(X_train): candidates = [(int(idx), float(metrics[distance]['distance'](v, X_train[idx].toarray()[0])))