diff --git a/algos.yaml b/algos.yaml index 888d60e..864bcbb 100644 --- a/algos.yaml +++ b/algos.yaml @@ -2,14 +2,14 @@ bit: jaccard: Bruteforce: disabled: false - docker-tag: ann-benchmarks-sklearn - singularity-tag: ann-bench-sklearn + docker-tag: ann-benchmarks-chemfp + singularity-tag: ann-bench-chemfp module: ann_benchmarks.algorithms.bruteforce - constructor: BruteForceBLAS + constructor: BruteForceFPS base-args: ["@metric"] run-groups: - base: - args: {} + empty: + args: [] Balltree(Sklearn): disabled: false docker-tag: ann-benchmarks-sklearn diff --git a/ann_benchmarks/.DS_Store b/ann_benchmarks/.DS_Store index ff82d8f..5bdd401 100644 Binary files a/ann_benchmarks/.DS_Store and b/ann_benchmarks/.DS_Store differ diff --git a/ann_benchmarks/algorithms/bruteforce.py b/ann_benchmarks/algorithms/bruteforce.py index afebcb6..6c5c2ad 100644 --- a/ann_benchmarks/algorithms/bruteforce.py +++ b/ann_benchmarks/algorithms/bruteforce.py @@ -96,3 +96,43 @@ def fix(index): ev = v return (index, pd[self._metric]['distance'](ep, ev)) return map(fix, indices) + +class BruteForceFPS(BaseANN): + def __init__(self, metric): + if metric != 'jaccard': + raise NotImplementedError("BruteForce doesn't support metric %s" % metric) + self._metric = metric + self.name = 'BruteForceFPS()' + + + def fit(self, X): + self.index = X + + def query(self, v, n): + import operator + import chemfp + dist_dict = {} + for j in range(self.index.shape[0]): + dist_dict[j] = chemfp.bitops.byte_tanimoto(self._queries.get_fingerprint(0), self._target.get_fingerprint(j)) + sorted_dict = sorted(dist_dict.items(), key=operator.itemgetter(1), reverse=True)[:n] + return [sorted_dict[j][0] for j in range(n)] + + @staticmethod + def matrToArena(X): + import chemfp + from bitarray import bitarray + # convert X to Chemfp fingerprintArena in memory + fps = [] + for row in range(X.shape[0]): + fp = bitarray(endian='big') + fp.extend(X[row]) + fps.append((row,fp.tobytes())) + return chemfp.load_fingerprints(fps,chemfp.Metadata(num_bits=X.shape[1]),reorder=False) + + def pre_fit(self, X): + self._target = BruteForceFPS.matrToArena(X) + + + def pre_query(self, v, n): + queryMatr = numpy.array([v]) + self._queries = BruteForceFPS.matrToArena(queryMatr) diff --git a/ann_benchmarks/runner.py b/ann_benchmarks/runner.py index 7f17dfc..f840c6b 100644 --- a/ann_benchmarks/runner.py +++ b/ann_benchmarks/runner.py @@ -35,7 +35,7 @@ def run_individual_query(algoname, algo, X_train, X_test, distance, count, run_c def single_query(v): # special code for the Risc, DivideSkip, and Chemfp - if algoname in ['Risc', 'DivideSkip', 'Chemfp']: + if algoname in ['Risc', 'DivideSkip', 'Chemfp', 'Bruteforce']: algo.pre_query(v, count) start = time.time() @@ -130,7 +130,7 @@ def run(definition, dataset, count, run_count, batch, rq): print(X_train.shape) algo.pre_fit(X_train) # special code for Chemfp - if definition.algorithm in 'Chemfp': + if definition.algorithm in ['Chemfp', 'Bruteforce']: algo.pre_fit(X_train) t0 = time.time() @@ -302,7 +302,7 @@ def run_singularity(definition, dataset, count, runs, timeout, batch, rq, radius print('String of command', strCmd) # Chemfp uses Python2 while others use Python3 - if definition.algorithm in 'Chemfp': + if definition.algorithm in ['Chemfp', 'Bruteforce']: subprocess.check_call('singularity exec %s/%s.sif python run_algorithm.py %s' %(sif_dir, definition.singularity_tag, strCmd), shell=True) else: subprocess.check_call('singularity exec %s/%s.sif python3 run_algorithm.py %s' %(sif_dir, definition.singularity_tag, strCmd), shell=True)