Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Genomics/SemiGlobalAlignment.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
149 lines (127 sloc)
5.41 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scores import * | |
from fasta import readFASTA | |
# this is a work in progress to give "semi-global alignments" | |
class LocalAlignment: | |
def __init__(self,A,B,match_score=None,gap=None): | |
if match_score is None: | |
def match_score(A,B): | |
if A!=B: | |
return 1 | |
else: | |
return -1 | |
self.match_score=match_score | |
if gap is None: | |
def gap(x): | |
return x | |
self.gap=gap | |
self.Rx=None | |
self.Sx=None | |
self.tb=False | |
self.A=A | |
self.B=B | |
self.nA=len(self.A)+1 | |
self.nB=len(self.B)+1 | |
self.dpV=np.zeros((self.nB,self.nA),dtype=int) | |
self.dpG=np.zeros((self.nB,self.nA),dtype=int) | |
self.dpE=np.zeros((self.nB,self.nA),dtype=int) | |
self.dpF=np.zeros((self.nB,self.nA),dtype=int) | |
for i in range(1,self.nA): | |
#print i | |
for j in range(1,self.nB): | |
if (i,j)==(1,1): | |
self.dpG[j][i]=self.match_score(B[0],A[0]) | |
self.dpE[j][i]=0 | |
self.dpF[j][i]=0 | |
if i==1 and j>1: | |
self.dpG[j][i]=self.match_score(B[j-1],A[0]) | |
self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1)) | |
self.dpF[j][i]=self.gap(j)+gap(1) | |
if i>1 and j==1: | |
self.dpG[j][i]=self.match_score(B[0],A[i-1]) | |
self.dpE[j][i]=self.gap(i)+gap(1) | |
self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1)) | |
if i>1 and j>1: | |
self.dpG[j][i]=self.dpV[j-1][i-1]+self.match_score(B[j-1],A[i-1]) | |
self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1)) | |
self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1)) | |
self.dpV[j][i]=min(0,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]) | |
if (i,j)==(1,1): | |
self.minV=self.dpV[j][i] | |
self.mini=i | |
self.minj=j | |
else: | |
if self.dpV[j][i]<self.minV: | |
self.minV=self.dpV[j][i] | |
self.mini=i | |
self.minj=j | |
def score(self): | |
return self.minV | |
def aligned(self): | |
if self.tb: | |
return (self.Rx,self.Sx) | |
i=self.mini | |
j=self.minj | |
R=[] | |
S=[] | |
s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]]) | |
while i>=1 and j>=1 and self.dpV[j][i]<0: | |
if s==2: | |
R.append(self.A[i-1]) | |
S.append(self.B[j-1]) | |
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | |
i=i-1 | |
j=j-1 | |
elif s==0: | |
R.append('-') | |
S.append(self.B[j-1]) | |
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | |
j=j-1 | |
while self.dpV[j][i]<0 and j>=1 and self.dpE[j][i]+self.gap(j)-self.gap(j-1)<self.dpV[j][i]+self.gap(1): | |
S.append(self.B[j-1]) | |
R.append('-') | |
#print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | |
j=j-1 | |
elif s==1: | |
S.append('-') | |
R.append(self.A[i-1]) | |
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | |
i=i-1 | |
while self.dpV[j][i]<0 and i>=1 and self.dpF[j][i]+self.gap(i)-self.gap(i-1)<self.dpV[j][i]+self.gap(1): | |
S.append('-') | |
R.append(self.A[i-1]) | |
#print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | |
i=i-1 | |
s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]]) | |
if i==0: | |
while j>0 and self.dpV[j][i]<0: | |
#R.append('-') | |
S.append(self.B[j-1]) | |
j=j-1 | |
elif j==0: | |
while i>0 and self.dpV[j][i]<0: | |
#S.append('-') | |
R.append(self.A[i-1]) | |
i=i-1 | |
self.Rx= ''.join(reversed(R)) | |
self.Sx=''.join(reversed(S)) | |
self.tb=True | |
return(self.Rx,self.Sx) | |
s=readFASTA("rosalind_loca.txt") | |
def pam250_penalty(x,y): | |
return -pam250_score(x,y) | |
def blosum62_penalty(x,y): | |
return -blosum62_score(x,y) | |
def agap(j): | |
return 5*j | |
#print len(s.values()[0]) | |
#print len(s.values()[1]) | |
A=s.values()[0] | |
B=s.values()[1] | |
#A='MEANLYPRTEINSTRING' | |
#B='PLEASANTLYEINSTEIN' | |
X=LocalAlignment(A,B,gap=agap,match_score=pam250_penalty) | |
(R,S)=X.aligned() | |
print -X.score() | |
print R.replace('-','') | |
print S.replace('-','') | |