Skip to content
Permalink
Browse files

Got edit distance working

  • Loading branch information...
Moria
Moria committed Oct 14, 2015
1 parent 014ef2b commit 600360961249509937446227a4967eaa721921ab
Showing with 103 additions and 0 deletions.
  1. +91 −0 centerstar.py
  2. +6 −0 input.txt
  3. +6 −0 test_inputs.txt
@@ -0,0 +1,91 @@
import sys
import os
import itertools
import numpy as np


def find_edit_distance(v,w):
'''
Finds the edit distance between w and v
Returns the distance between the two strings
Edit graph is set up as w along y axis (left), v along x axis (top)
Score 0 for match, 1 for mistmatch or indel
'''
# n, m are number of rows, cols
# Note we need the +1 because we need to add the 0th row and column as our starting point
# And python indexes at 0

n = len(v)+1
m = len(w)+1

# Init graph
edit_graph = np.zeros((n, m))

# Add distances along x, y axis
# +1 at every step because indels -> +1
for i in range(n):
edit_graph[i,0] = i
for j in range(m):
edit_graph[0,j] = j

# Go through the graph
for i in range(1, n):
for j in range(1,m):

# Get the diag distance
if v[i-1] == w[j-1]:
diag = edit_graph[i-1,j-1]
else:
diag = edit_graph[i-1,j-1] + 1

# Update
edit_graph[i,j] = min(edit_graph[i-1, j]+1, edit_graph[i,j-1]+1, diag)

# Return n,m
distance = edit_graph[n-1,m-1]
return distance



def main():
'''
Main Function
'''

# Try to get file as input
try:
input_file = sys.argv[1]
except:
print "Please supply an input file"
sys.exit()


# Read in n, strings to S
with open(input_file, 'r') as f:
content = f.readlines()

n = int(content[0].strip())
S = [s.strip() for s in content[1:]]

# Init scoring table to find center string candidate
scores = [0 for i in range(n)]

# Create all index pairs of strings to score
string_pairs = itertools.combinations(range(n), 2)

# Iterate and score
for pair in string_pairs:
w = S[pair[0]]
v = S[pair[1]]
distance = find_edit_distance(w,v)

# Update scores
for i in range(n):
if i in pair:
scores[i] += distance



if __name__ == '__main__':
main()

@@ -0,0 +1,6 @@
4
AXZ
AXXZ
AYXYZ
AYZ

@@ -0,0 +1,6 @@
5
CCTGCTGCAG
GATGTGCCG
GATGTGCAG
CCGCTAGCAG
CCTGTAGG

0 comments on commit 6003609

Please sign in to comment.
You can’t perform that action at this time.