Permalink

Fetching contributors…

Cannot retrieve contributors at this time

from fasta import readFASTA | |

import numpy as np | |

import string | |

import operator as op | |

# The key to this algorithm is that the LCS of A[0..n] and B[0..m] | |

# can be computed from the LCS of A[0..(n-1)] and B[0..(m-1)] | |

# if A[n]==B[m] then the LCS of A[0..n] and B[0..m] is the LCS | |

# of A[0..(n-1)] and B[0..(m-1)] with A[n] (B[m]) appended -- then length | |

# goes up by one. If A[n]!=B[m] then the LCS of the two is the longer | |

# the lcs of A[0..(n-1)],B[m] and A[0..n] and B[0..(m-1)]. | |

# note that this algorithm is very close to Needleman-Wunsch and Smith-Waterman, | |

# just the scoring part is different. | |

s=readFASTA("rosalind_lcsq.txt") | |

A=s.values()[0] | |

B=s.values()[1] | |

#A='AACCTTGG' | |

#B='ACACTGTGA' | |

nA=len(A) | |

nB = len(B) | |

dp=np.zeros((nB,nA)) | |

pointerx=np.zeros((nB,nA),dtype=int) | |

pointery=np.zeros((nB,nA),dtype=int) | |

savedp=0 | |

savedi=0 | |

savedj=0 | |

if A[0]==B[0]: | |

dp[0][0]=1 | |

for i in range(1,nA): | |

pointerx[0][i]=-1 | |

if B[0]==A[i]: | |

dp[0][i]=1 | |

savedi=i | |

savedp=1 | |

pointerx[0][i]=-1 | |

pointery[0][i]=-1 | |

else: | |

dp[0][i]=dp[0][i-1] | |

pointerx[0][i]=-1 | |

for j in range(1,nB): | |

pointery[j][0]=-1 | |

if B[j]==A[0]: | |

dp[j][0]=1 | |

savedj=j | |

savedp=1 | |

pointerx[j][0]=-1 | |

pointery[j][0]=-1 | |

else: | |

dp[j][0]=dp[j-1][0] | |

pointery[j][0]=-1 | |

for i in range(1,nA): | |

for j in range(1,nB): | |

if A[i]==B[j]: | |

dp[j][i]=dp[j-1][i-1]+1 | |

(dx,dy)=(-1,-1) | |

else: | |

t=[(dp[j-1][i],(-1,0)),(dp[j][i-1],(0,-1))] | |

(t,(dx,dy))=max(t,key=lambda x: op.getitem(x,0)) | |

dp[j][i]=dp[j+dx][i+dy] | |

pointerx[j][i],pointery[j][i]=dx,dy | |

if dp[j][i]>savedp: | |

savedp=dp[j][i] | |

savedi,savedj=i,j | |

answer=[] | |

i,j = savedi,savedj | |

while True: | |

## print i,j,pointerx[j][i],pointery[j][i],dp[j][i] | |

if pointerx[j][i]==pointery[j][i] and pointerx[j][i]==-1: | |

answer.append(A[i]) | |

i,j=i+pointery[j][i],j+pointerx[j][i] | |

if i<0 or j<0: | |

break | |

print ''.join(reversed(answer)) | |

##### This version came from the Rosalind Web Site | |

##### it preserves only the last row of the table, and the | |

##### current row; but it has to keep the corresponding longest subsequences | |

#S, T = open('rosalind_lcsq.txt').read().splitlines() | |

#cur = [''] * (len(T) + 1) #dummy entries as per wiki | |

#for s in A: | |

# last, cur = cur, [''] | |

# for i, t in enumerate(B): | |

# cur.append(last[i] + s if s==t else max(last[i+1], cur[-1], key=len)) | |

#print cur[-1] | |