Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
85 lines (64 sloc) 1.87 KB
from fasta import readFASTA
import numpy as np
import string
import operator as op
# this version of the program computes an optimal global alignment
# of two strings (read from the file rosalind_edta.txt in FASTA format)
# using a scoring system that counts gaps and substitutions each with
# penalty 1. In other words it minimizes the edit distance.
#s=readFASTA("rosalind_edta.txt")
#A=s.values()[0]
#B=s.values()[1]
A='CAGCACTTGGATTCTCGG'
B='CAGCGTGG'
nA=len(A)+1
nB=len(B)+1
#dp[j][i] is the minimum edit distance for A[:i] and B[:j]
#note that the range of i,j is zero up to
dp=np.zeros((nB,nA),dtype=int)
pointerx=np.zeros((nB,nA),dtype=int)
pointery=np.zeros((nB,nA),dtype=int)
# initialization
for i in range(nA):
dp[0][i]=i
pointery[0][i]=-1
for j in range(nB):
dp[j][0]=j
pointerx[j][0]=-1
for i in range(1,nA):
for j in range(1,nB):
if A[i-1]==B[j-1]:
z=-1
else:
z=1
#print i,j,A[i-1],B[j-1]
t=[(dp[j-1][i]+1,(-1,0,1)),(dp[j][i-1]+1,(0,-1,1)),(dp[j-1][i-1]+z,(-1,-1,z))]
(t,(dx,dy,z))=min(t,key=lambda x: op.getitem(x,0))
#print t,dx,dy,z
dp[j][i]=dp[j+dx][i+dy]+z
pointerx[j][i],pointery[j][i]=dx,dy
AX=[]
BX=[]
i,j=nA-1,nB-1
while True:
# print i,j,pointery[j][i],pointerx[j][i],
if (pointerx[j][i],pointery[j][i])==(-1,-1):
AX.append(A[i-1])
BX.append(B[j-1])
# print (-1,-1)
elif (pointerx[j][i],pointery[j][i])==(-1,0):
BX.append(B[j-1])
AX.append('-')
# print (-1,0)
elif (pointerx[j][i],pointery[j][i])==(0,-1):
BX.append('-')
AX.append(A[i-1])
# print (0,-1)
# print i,j
i,j=i+pointery[j][i],j+pointerx[j][i]
# print i,j
if i<=0 and j<=0:
break
print dp[nB-1][nA-1]
print ''.join(reversed(AX))
print ''.join(reversed(BX))
You can’t perform that action at this time.