Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
125 lines (97 sloc) 3.07 KB
#This software is a free software. Thus, it is licensed under GNU General Public License version 3 or later.
#Python implementation to Smith-Waterman Algorithm for Homework 1 of Bioinformatics class.
#Forrest Bao, Sept. 26, 2007 <http://fsbao.net> <forrest.bao aT gmail.com>
from random import *
alpha='cagt'
#read the first sequence
seq1=''
seq2=''
for i in range(6):
seq1=seq1+choice(alpha);
for i in range(15):
seq2=seq2+choice(alpha);
seq1='gggggg'
seq2='ggctttggtcacg'
print seq1, "\n", seq2
m,n = len(seq1),len(seq2) #length of two sequences
#generate DP table and traceback path pointer matrix
score=[[float(0) for j in range(n+1)] for i in range(m+1)]
pointer=[[float(0) for j in range(n+1)] for i in range(m+1)]
P=0;
penalty=0
def match_score(alpha,beta):
if alpha==beta:
return 10
else:
return -8
max_score=P; #initial maximum score in DP table
#calculate DP table and mark pointers
for i in range(1,m+1):
for j in range(1,n+1):
score_up=score[i-1][j]+penalty
score_down=score[i][j-1]+penalty
score_diagonal=score[i-1][j-1]+match_score(seq1[i-1],seq2[j-1]);
score[i][j]=max(0.0,score_up,score_down,score_diagonal);
if score[i][j]==0:
pointer[i][j]=0; #0 means end of the path
if score[i][j]==score_up:
pointer[i][j]=1; #1 means trace up
if score[i][j]==score_down:
pointer[i][j]=2; #2 means trace left
if score[i][j]==score_diagonal:
print i,j, score[i][j]
pointer[i][j]=3; #3 means trace diagonal
if score[i][j]>=max_score:
print i,j,score[i][j],max_score
max_i=i;
max_j=j;
max_score=score[i][j];
#END of DP table
for i in range(m+1):
for j in range(n+1):
print '{0:2f}'.format(score[i][j]), '{0:2d}'.format(int(pointer[i][j])), '|',
print
align1,align2='',''; #initial sequences
i,j=max_i,max_j;
print "Found:", max_i, max_j, score[i][j], pointer[i][j]
#indices of path starting point
#missing beginning and ending for this routine
#traceback, follow pointers
while pointer[i][j]!=0:
if pointer[i][j]==3:
align1=align1+seq1[i-1];
align2=align2+seq2[j-1];
i=i-1;
j=j-1;
elif pointer[i][j]==2:
align1=align1+'-';
align2=align2+seq2[j-1];
j=j-1;
elif pointer[i][j]==1:
align1=align1+seq1[i-1];
align2=align2+'-';
i=i-1;
#END of traceback
align1=align1[::-1]; #reverse sequence 1
align2=align2[::-1]; #reverse sequence 2
i,j=0,0;
#calcuate identity, similarity, score and aligned sequeces
def result(align1,align2):
symbol='';
score=0;
identity,similarity=0,0;
for i in range(0,len(align1)):
if align1[i]==align2[i]: #if two AAs are the same, then output the letter
symbol=symbol+align1[i];
identity=identity+1;
score=score+match_score(align1[i],align2[i]);
elif align1[i]=='-' or align2[i]=='-': #if one of them is a gap, output a space
symbol=symbol+' ';
score=score+penalty
print 'length of common subsequence = ', identity
print 'Score =', score;
print align1
print align2
print symbol
#END of function result
result(align1,align2)
You can’t perform that action at this time.