Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
113 lines (102 sloc) 4.74 KB
import numpy as np
# this is an implementation of the needleman-wunsch algorithm
# and it should handle the affine gap situation
# the match_score algorithm should score positive numbers for mismatches
# the gap function should score positive numbers for gaps
# the returned value should be AS SMALL AS POSSIBLE so negative numbers
# are "good"
class GlobalAlignment:
def __init__(self,A,B,match_score=None,gap=None):
if match_score is None:
def match_score(A,B):
if A!=B:
return 1
else:
return -1
self.match_score=match_score
if gap is None:
def gap(x):
return x
self.gap=gap
self.Rx=None
self.Sx=None
self.tb=False
self.A=A
self.B=B
self.nA=len(self.A)+1
self.nB=len(self.B)+1
self.dpV=np.zeros((self.nB,self.nA),dtype=int)
self.dpG=np.zeros((self.nB,self.nA),dtype=int)
self.dpE=np.zeros((self.nB,self.nA),dtype=int)
self.dpF=np.zeros((self.nB,self.nA),dtype=int)
for i in range(1,self.nA):
for j in range(1,self.nB):
if (i,j)==(1,1):
self.dpG[j][i]=self.match_score(B[0],A[0])
self.dpE[j][i]=self.gap(1)+self.gap(1)
self.dpF[j][i]=self.gap(1)+self.gap(1)
if i==1 and j>1:
self.dpG[j][i]=self.match_score(B[j-1],A[0])+self.gap(j-1)
self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1))
self.dpF[j][i]=self.gap(j)+self.gap(1)
if i>1 and j==1:
self.dpG[j][i]=self.match_score(B[0],A[i-1])+self.gap(i-1)
self.dpE[j][i]=self.gap(i)+self.gap(1)
self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1))
if i>1 and j>1:
self.dpG[j][i]=self.dpV[j-1][i-1]+self.match_score(B[j-1],A[i-1])
self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1))
self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1))
self.dpV[j][i]=min(self.dpE[j][i],self.dpF[j][i],self.dpG[j][i])
def score(self):
return self.dpV[self.nB-1][self.nA-1]
def aligned(self):
if self.tb:
return (self.Rx,self.Sx)
i=self.nA-1
j=self.nB-1
R=[]
S=[]
s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]])
while i>=1 and j>=1:
if s==2:
R.append(self.A[i-1])
S.append(self.B[j-1])
# print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
i=i-1
j=j-1
elif s==0:
R.append('-')
S.append(self.B[j-1])
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
j=j-1
while j>=1 and self.dpE[j][i]+self.gap(j)-self.gap(j-1)<self.dpV[j][i]+self.gap(1):
S.append(self.B[j-1])
R.append('-')
# print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
j=j-1
elif s==1:
S.append('-')
R.append(self.A[i-1])
# print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
i=i-1
while i>=1 and self.dpF[j][i]+self.gap(i)-self.gap(i-1)<self.dpV[j][i]+self.gap(1):
S.append('-')
R.append(self.A[i-1])
# print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
i=i-1
s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]])
if i==0:
while j>0:
R.append('-')
S.append(self.B[j-1])
j=j-1
elif j==0:
while i>0:
S.append('-')
R.append(self.A[i-1])
i=i-1
self.Rx= ''.join(reversed(R))
self.Sx=''.join(reversed(S))
self.tb=True
return(self.Rx,self.Sx)
You can’t perform that action at this time.