Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#This software is a free software. Thus, it is licensed under GNU General Public License version 3 or later.
#Python implementation to Smith-Waterman Algorithm for Homework 1 of Bioinformatics class.
#Forrest Bao, Sept. 26, 2007 <http://fsbao.net> <forrest.bao aT gmail.com>
import sys, string
from random import *
from numpy import *
from matplotlib import *
alpha='cagt'
#read the first sequence
seq1=''
seq2=''
for i in range(6):
seq1=seq1+choice(alpha);
for i in range(20):
seq2=seq2+choice(alpha);
print seq1, "\n", seq2
m,n = len(seq1),len(seq2) #length of two sequences
#generate DP table and traceback path pointer matrix
score=zeros((m+1,n+1)) #the DP table
pointer=zeros((m+1,n+1)) #to store the traceback path
P=0;
penalty=-1
def match_score(alpha,beta):
if alpha==beta:
return 2
else:
return penalty
max_score=P; #initial maximum score in DP table
#calculate DP table and mark pointers
for i in range(1,m+1):
for j in range(1,n+1):
score_up=score[i-1][j]+penalty ;
score_down=score[i][j-1]+penalty ;
score_diagonal=score[i-1][j-1]+match_score(seq1[i-1],seq2[j-1]);
score[i][j]=max(0,score_up,score_down,score_diagonal);
if score[i][j]==0:
pointer[i][j]=0; #0 means end of the path
if score[i][j]==score_up:
pointer[i][j]=1; #1 means trace up
if score[i][j]==score_down:
pointer[i][j]=2; #2 means trace left
if score[i][j]==score_diagonal:
pointer[i][j]=3; #3 means trace diagonal
if score[i][j]>=max_score:
max_i=i;
max_j=j;
max_score=score[i][j];
#END of DP table
for i in range(m+1):
for j in range(n+1):
print '{0:4d}'.format(int(score[i,j])), '{0:4d}'.format(int(pointer[i][j])), '|',
print
align1,align2='',''; #initial sequences
i,j=max_i,max_j; #indices of path starting point
#missing beginning and ending for this routine
#traceback, follow pointers
while pointer[i][j]!=0:
if pointer[i][j]==3:
align1=align1+seq1[i-1];
align2=align2+seq2[j-1];
i=i-1;
j=j-1;
elif pointer[i][j]==2:
align1=align1+'-';
align2=align2+seq2[j-1];
j=j-1;
elif pointer[i][j]==1:
align1=align1+seq1[i-1];
align2=align2+'-';
i=i-1;
#END of traceback
align1=align1[::-1]; #reverse sequence 1
align2=align2[::-1]; #reverse sequence 2
i,j=0,0;
#calcuate identity, similarity, score and aligned sequeces
def result(align1,align2):
symbol='';
found=0;
score=0;
identity,similarity=0,0;
for i in range(0,len(align1)):
if align1[i]==align2[i]: #if two AAs are the same, then output the letter
symbol=symbol+align1[i];
identity=identity+1;
score=score+match_score(align1[i],align2[i]);
elif align1[i]=='-' or align2[i]=='-': #if one of them is a gap, output a space
symbol=symbol+' ';
score=score+match_score(align1[i],align2[i]);
print 'length of common subsequence = ', identity
print 'Score =', score;
print align1
print align2
print symbol
#END of function result
result(align1,align2)