Skip to content
Permalink
6d5cc671f5
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
84 lines (64 sloc) 1.87 KB
from fasta import readFASTA
import numpy as np
import string
import operator as op
# this version of the program computes an optimal global alignment
# of two strings (read from the file rosalind_edta.txt in FASTA format)
# using a scoring system that counts gaps and substitutions each with
# penalty 1. In other words it minimizes the edit distance.
#s=readFASTA("rosalind_edta.txt")
#A=s.values()[0]
#B=s.values()[1]
A='CAGCACTTGGATTCTCGG'
B='CAGCGTGG'
nA=len(A)+1
nB=len(B)+1
#dp[j][i] is the minimum edit distance for A[:i] and B[:j]
#note that the range of i,j is zero up to
dp=np.zeros((nB,nA),dtype=int)
pointerx=np.zeros((nB,nA),dtype=int)
pointery=np.zeros((nB,nA),dtype=int)
# initialization
for i in range(nA):
dp[0][i]=i
pointery[0][i]=-1
for j in range(nB):
dp[j][0]=j
pointerx[j][0]=-1
for i in range(1,nA):
for j in range(1,nB):
if A[i-1]==B[j-1]:
z=-1
else:
z=1
#print i,j,A[i-1],B[j-1]
t=[(dp[j-1][i]+1,(-1,0,1)),(dp[j][i-1]+1,(0,-1,1)),(dp[j-1][i-1]+z,(-1,-1,z))]
(t,(dx,dy,z))=min(t,key=lambda x: op.getitem(x,0))
#print t,dx,dy,z
dp[j][i]=dp[j+dx][i+dy]+z
pointerx[j][i],pointery[j][i]=dx,dy
AX=[]
BX=[]
i,j=nA-1,nB-1
while True:
# print i,j,pointery[j][i],pointerx[j][i],
if (pointerx[j][i],pointery[j][i])==(-1,-1):
AX.append(A[i-1])
BX.append(B[j-1])
# print (-1,-1)
elif (pointerx[j][i],pointery[j][i])==(-1,0):
BX.append(B[j-1])
AX.append('-')
# print (-1,0)
elif (pointerx[j][i],pointery[j][i])==(0,-1):
BX.append('-')
AX.append(A[i-1])
# print (0,-1)
# print i,j
i,j=i+pointery[j][i],j+pointerx[j][i]
# print i,j
if i<=0 and j<=0:
break
print dp[nB-1][nA-1]
print ''.join(reversed(AX))
print ''.join(reversed(BX))