Skip to content

Commit

Permalink
add BruteforceFPS class
Browse files Browse the repository at this point in the history
  • Loading branch information
ChunjiangZhu committed Jul 3, 2020
1 parent 4795f3e commit eff89a1
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 8 deletions.
10 changes: 5 additions & 5 deletions algos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ bit:
jaccard:
Bruteforce:
disabled: false
docker-tag: ann-benchmarks-sklearn
singularity-tag: ann-bench-sklearn
docker-tag: ann-benchmarks-chemfp
singularity-tag: ann-bench-chemfp
module: ann_benchmarks.algorithms.bruteforce
constructor: BruteForceBLAS
constructor: BruteForceFPS
base-args: ["@metric"]
run-groups:
base:
args: {}
empty:
args: []
Balltree(Sklearn):
disabled: false
docker-tag: ann-benchmarks-sklearn
Expand Down
Binary file modified ann_benchmarks/.DS_Store
Binary file not shown.
40 changes: 40 additions & 0 deletions ann_benchmarks/algorithms/bruteforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,43 @@ def fix(index):
ev = v
return (index, pd[self._metric]['distance'](ep, ev))
return map(fix, indices)

class BruteForceFPS(BaseANN):
def __init__(self, metric):
if metric != 'jaccard':
raise NotImplementedError("BruteForce doesn't support metric %s" % metric)
self._metric = metric
self.name = 'BruteForceFPS()'


def fit(self, X):
self.index = X

def query(self, v, n):
import operator
import chemfp
dist_dict = {}
for j in range(self.index.shape[0]):
dist_dict[j] = chemfp.bitops.byte_tanimoto(self._queries.get_fingerprint(0), self._target.get_fingerprint(j))
sorted_dict = sorted(dist_dict.items(), key=operator.itemgetter(1), reverse=True)[:n]
return [sorted_dict[j][0] for j in range(n)]

@staticmethod
def matrToArena(X):
import chemfp
from bitarray import bitarray
# convert X to Chemfp fingerprintArena in memory
fps = []
for row in range(X.shape[0]):
fp = bitarray(endian='big')
fp.extend(X[row])
fps.append((row,fp.tobytes()))
return chemfp.load_fingerprints(fps,chemfp.Metadata(num_bits=X.shape[1]),reorder=False)

def pre_fit(self, X):
self._target = BruteForceFPS.matrToArena(X)


def pre_query(self, v, n):
queryMatr = numpy.array([v])
self._queries = BruteForceFPS.matrToArena(queryMatr)
6 changes: 3 additions & 3 deletions ann_benchmarks/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def run_individual_query(algoname, algo, X_train, X_test, distance, count, run_c

def single_query(v):
# special code for the Risc, DivideSkip, and Chemfp
if algoname in ['Risc', 'DivideSkip', 'Chemfp']:
if algoname in ['Risc', 'DivideSkip', 'Chemfp', 'Bruteforce']:
algo.pre_query(v, count)

start = time.time()
Expand Down Expand Up @@ -130,7 +130,7 @@ def run(definition, dataset, count, run_count, batch, rq):
print(X_train.shape)
algo.pre_fit(X_train)
# special code for Chemfp
if definition.algorithm in 'Chemfp':
if definition.algorithm in ['Chemfp', 'Bruteforce']:
algo.pre_fit(X_train)

t0 = time.time()
Expand Down Expand Up @@ -302,7 +302,7 @@ def run_singularity(definition, dataset, count, runs, timeout, batch, rq, radius
print('String of command', strCmd)

# Chemfp uses Python2 while others use Python3
if definition.algorithm in 'Chemfp':
if definition.algorithm in ['Chemfp', 'Bruteforce']:
subprocess.check_call('singularity exec %s/%s.sif python run_algorithm.py %s' %(sif_dir, definition.singularity_tag, strCmd), shell=True)
else:
subprocess.check_call('singularity exec %s/%s.sif python3 run_algorithm.py %s' %(sif_dir, definition.singularity_tag, strCmd), shell=True)
Expand Down

0 comments on commit eff89a1

Please sign in to comment.