Permalink
Please
sign in to comment.
Showing
with
103 additions
and 0 deletions.
 +91 −0 centerstar.py
 +6 −0 input.txt
 +6 −0 test_inputs.txt
@@ 0,0 +1,91 @@  
import sys  
import os  
import itertools  
import numpy as np  




def find_edit_distance(v,w):  
'''  
Finds the edit distance between w and v  
Returns the distance between the two strings  
Edit graph is set up as w along y axis (left), v along x axis (top)  
Score 0 for match, 1 for mistmatch or indel  
'''  
# n, m are number of rows, cols  
# Note we need the +1 because we need to add the 0th row and column as our starting point  
# And python indexes at 0  


n = len(v)+1  
m = len(w)+1  


# Init graph  
edit_graph = np.zeros((n, m))  


# Add distances along x, y axis  
# +1 at every step because indels > +1  
for i in range(n):  
edit_graph[i,0] = i  
for j in range(m):  
edit_graph[0,j] = j  


# Go through the graph  
for i in range(1, n):  
for j in range(1,m):  


# Get the diag distance  
if v[i1] == w[j1]:  
diag = edit_graph[i1,j1]  
else:  
diag = edit_graph[i1,j1] + 1  


# Update  
edit_graph[i,j] = min(edit_graph[i1, j]+1, edit_graph[i,j1]+1, diag)  


# Return n,m  
distance = edit_graph[n1,m1]  
return distance  






def main():  
'''  
Main Function  
'''  


# Try to get file as input  
try:  
input_file = sys.argv[1]  
except:  
print "Please supply an input file"  
sys.exit()  




# Read in n, strings to S  
with open(input_file, 'r') as f:  
content = f.readlines()  


n = int(content[0].strip())  
S = [s.strip() for s in content[1:]]  


# Init scoring table to find center string candidate  
scores = [0 for i in range(n)]  


# Create all index pairs of strings to score  
string_pairs = itertools.combinations(range(n), 2)  


# Iterate and score  
for pair in string_pairs:  
w = S[pair[0]]  
v = S[pair[1]]  
distance = find_edit_distance(w,v)  


# Update scores  
for i in range(n):  
if i in pair:  
scores[i] += distance  






if __name__ == '__main__':  
main()  

@@ 0,0 +1,6 @@  
4  
AXZ  
AXXZ  
AYXYZ  
AYZ  

@@ 0,0 +1,6 @@  
5  
CCTGCTGCAG  
GATGTGCCG  
GATGTGCAG  
CCGCTAGCAG  
CCTGTAGG 
0 comments on commit
6003609