Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Added many files from Rosalind project and related genomics work.
- Loading branch information
Showing
128 changed files
with
9,018 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import numpy as np | ||
import matplotlib.pylab as plt | ||
|
||
pA,pB,pO=1.0/3,1.0/3,1.0/3 | ||
nA,nB,nAB,nO=25,25,50,15 | ||
|
||
N=nA+nB+nAB+nO | ||
|
||
print pA, pB,pO | ||
|
||
for i in range(20): | ||
|
||
EAA=pA**2*nA/(pA**2+2*pA*pO) | ||
EAO=2*pA*pO*nA/(pA**2+2*pA*pO) | ||
EBB=pB**2*nB/(pB**2+2*pB*pO) | ||
EBO=2*pB*pO*nB/(pB**2+2*pB*pO) | ||
EAB=nAB | ||
EOO=nO | ||
|
||
pA=(2*EAA+EAO+EAB)/(2*N) | ||
pB=(2*EBB+EBO+EAB)/(2*N) | ||
pO=(2*EOO+EAO+EBO)/(2*N) | ||
|
||
like=nA*np.log(pA**2+2*pA*pO)+nB*np.log(pB**2+2*pB*pO)+nAB*np.log(2*pA*pB)+nO*np.log(pO**2) | ||
|
||
print pA, pB,pO,like | ||
|
||
print EAA,EAO,EAB,EBB,EBO,EOO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import operator | ||
import numpy as np | ||
import numpy.random as rm | ||
lsave=0 | ||
#theta=[pi,L11-L14,L21-L24,R11-R14,R21-R24] | ||
A={} | ||
for xx in range(500): | ||
u=np.array([[4,2,2,2],[2,4,2,2],[2,2,4,2],[2,2,2,4]]) | ||
L1=rm.random_sample(4) | ||
L2=rm.random_sample(4) | ||
R1=rm.random_sample(4) | ||
R2=rm.random_sample(4) | ||
L1=L1/sum(L1) | ||
L2=L1/sum(L2) | ||
R1=R1/sum(R1) | ||
R2=R2/sum(R2) | ||
pi=rm.random_sample(1)[0] | ||
N=sum(u.reshape(16,1))[0] | ||
|
||
while True: | ||
FL=np.array([[pi*L1[i]*L2[j] for i in range(4)] for j in range(4)]) | ||
FR=np.array([[(1-pi)*R1[i]*R2[j] for i in range(4)] for j in range(4)]) | ||
F=FL+FR | ||
lobs=sum((u*np.log(F)).reshape(16,1))[0] | ||
uL=u*FL/F | ||
NL=sum(uL.reshape(16,1))[0] | ||
uR=u*FR/F | ||
NR=sum(uR.reshape(16,1))[0] | ||
|
||
pistar=sum(uL.reshape(16,1))[0]/N | ||
L1star=sum(uL.T)/NL | ||
L2star=sum(uL)/NL | ||
R1star=sum(uR.T)/NR | ||
R2star=sum(uR)/NR | ||
|
||
if np.abs(lobs-lsave)<.000001: | ||
A[lobs]=F | ||
break | ||
|
||
R1=R1star | ||
R2=R2star | ||
L2=L2star | ||
L1=L1star | ||
pi=pistar | ||
lsave=lobs | ||
|
||
print 40*A[sorted(A.iterkeys(),reverse=True)[0]] | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import urllib | ||
import urllib2 | ||
from fasta import readFASTA | ||
from collections import OrderedDict | ||
|
||
url = 'http://www.ebi.ac.uk/Tools/services/rest/emboss_water/' | ||
s=readFASTA("rosalind_laff.txt") | ||
param_dict={ | ||
'email':'jeremy.teitelbaum@uconn.edu', | ||
'title':'Local Alignment', | ||
'matrix': 'EBLOSUM62', | ||
'format':'markx3', | ||
'gapopen':15, | ||
'gapext':1, | ||
} | ||
param_dict['asequence']=s.values()[0] | ||
param_dict['sid1']='A' | ||
param_dict['bsequence']=s.values()[1] | ||
param_dict['sid2']='B' | ||
params = urllib.urlencode(param_dict) | ||
|
||
response = urllib2.urlopen(url+'run/', params).read() | ||
while urllib2.urlopen(url+'status/'+response).read()!="FINISHED": | ||
continue | ||
|
||
answer=urllib2.urlopen(url+'resulttypes/'+response).read() | ||
|
||
final=urllib2.urlopen(url+'result/'+response+'/out').read() | ||
ls= [x for x in final.split('\n') if len(x)>0] | ||
value,key="","" | ||
s=OrderedDict({}) | ||
j=0 | ||
for x in ls: | ||
if x[0] in ['#',';']: | ||
#print x | ||
continue | ||
|
||
if x[0]==">": | ||
|
||
if key!="": | ||
s[key]=value | ||
key=x[1:].split()[0]+'-'+str(j) | ||
|
||
j=j+1 | ||
value="" | ||
else: | ||
value=value+x.strip() | ||
|
||
s[key]=value | ||
|
||
R='' | ||
for x in s.values()[0]: | ||
if x!='-': | ||
R+=x | ||
S='' | ||
for x in s.values()[1]: | ||
if x!='-': | ||
S+=x | ||
print R | ||
print S | ||
#print s.values()[1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
|
||
import numpy as np | ||
from pymc import * | ||
import matplotlib.pyplot as plt | ||
from matplotlib.pyplot import scatter,show | ||
H,R=4,0 | ||
x=DiscreteUniform('X',lower=1,upper=4,size=4) | ||
|
||
|
||
@potential | ||
def L(H=4,R=0,x=x): | ||
p=0 | ||
for i in range(1,4): | ||
if x[i]==x[0]: | ||
p+=H | ||
else: | ||
p+=R | ||
return p | ||
|
||
fig,axes=plt.subplots(nrows=2,ncols=1) | ||
|
||
N,t=5000,1 | ||
model=MCMC([x,L]) | ||
model.sample(N,thin=t) | ||
print '!' | ||
#u=x.trace()[:,0]+np.random.normal(0,0.1,N/t) | ||
#v=x.trace()[:,1]+np.random.normal(0,0.1,N/t) | ||
#scatter(u,v) | ||
#show() | ||
z=model.trace('X')[:] | ||
s=0.0 | ||
for n in z: | ||
if n[0]==n[1]: | ||
s+=1 | ||
print s/len(z) | ||
print np.exp(H)/(np.exp(H)+3*np.exp(R)) | ||
u=z[:,0]+np.random.normal(0,.1,len(z[:,0])) | ||
v=z[:,1]+np.random.normal(0,.1,len(z[:,1])) | ||
axes[0].scatter(u,v) | ||
Hval,Rval=5,1 | ||
model.sample(N,thin=t) | ||
|
||
z=model.trace('X')[:] | ||
u=z[:,0]+np.random.normal(0,.1,len(z[:,0])) | ||
v=z[:,1]+np.random.normal(0,.1,len(z[:,1])) | ||
axes[1].scatter(u,v) | ||
|
||
show() | ||
|
||
|
||
|
||
#import pydot | ||
#graph=pydot.Dot(graph_type='graph') | ||
#graph.set_graphviz_executables({'dot':'C:\\Program Files (x86)\Graphviz2.38\\bin\dot.exe'}) | ||
|
||
#import scipy.misc | ||
#pymc.graph.graph(model,path='Desktop',name='my_graph',format='png',prog='C:\Program Files (x86)\Graphviz2.38\\bin\dot.exe') | ||
#plt.figure(figsize=(8,8)) | ||
#plt.imshow(scipy.misc.imread('my_graph.png')) | ||
#plt.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import numpy | ||
|
||
differences=[471,568,520,534,501,499] | ||
n=2205 | ||
pdistances=[differences[i]/n for i in range(6)] | ||
jcdistances=[-3/4*log(1-4/3*pdistances[i]) for i in range(6)] | ||
|
||
print pdistances | ||
print jcdistances |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
import numpy as np | ||
from scores import * | ||
from fasta import readFASTA | ||
# this is an implementation of the smith-waterman algorithm | ||
|
||
|
||
class LocalAlignment: | ||
def __init__(self,A,B,match_score=None,gap=None): | ||
if match_score is None: | ||
def match_score(A,B): | ||
if A!=B: | ||
return 1 | ||
else: | ||
return -1 | ||
self.match_score=match_score | ||
if gap is None: | ||
def gap(x): | ||
return x | ||
self.gap=gap | ||
|
||
self.Rx=None | ||
self.Sx=None | ||
self.tb=False | ||
self.A=A | ||
self.B=B | ||
self.nA=len(self.A)+1 | ||
self.nB=len(self.B)+1 | ||
self.dpV=np.zeros((self.nB,self.nA),dtype=int) | ||
self.dpG=np.zeros((self.nB,self.nA),dtype=int) | ||
self.dpE=np.zeros((self.nB,self.nA),dtype=int) | ||
self.dpF=np.zeros((self.nB,self.nA),dtype=int) | ||
for i in range(1,self.nA): | ||
#print i | ||
for j in range(1,self.nB): | ||
if (i,j)==(1,1): | ||
self.dpG[j][i]=self.match_score(B[0],A[0]) | ||
self.dpE[j][i]=0 | ||
self.dpF[j][i]=0 | ||
if i==1 and j>1: | ||
self.dpG[j][i]=self.match_score(B[j-1],A[0]) | ||
self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1)) | ||
self.dpF[j][i]=self.gap(j)+gap(1) | ||
if i>1 and j==1: | ||
self.dpG[j][i]=self.match_score(B[0],A[i-1]) | ||
self.dpE[j][i]=self.gap(i)+gap(1) | ||
self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1)) | ||
if i>1 and j>1: | ||
self.dpG[j][i]=self.dpV[j-1][i-1]+self.match_score(B[j-1],A[i-1]) | ||
self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1)) | ||
self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1)) | ||
|
||
self.dpV[j][i]=min(0,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]) | ||
|
||
if (i,j)==(1,1): | ||
self.minV=self.dpV[j][i] | ||
self.mini=i | ||
self.minj=j | ||
else: | ||
if self.dpV[j][i]<self.minV: | ||
self.minV=self.dpV[j][i] | ||
self.mini=i | ||
self.minj=j | ||
|
||
|
||
def score(self): | ||
return self.minV | ||
|
||
def aligned(self): | ||
if self.tb: | ||
return (self.Rx,self.Sx) | ||
i=self.mini | ||
j=self.minj | ||
R=[] | ||
S=[] | ||
s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]]) | ||
while i>=1 and j>=1 and self.dpV[j][i]<0: | ||
if s==2: | ||
R.append(self.A[i-1]) | ||
S.append(self.B[j-1]) | ||
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | ||
i=i-1 | ||
j=j-1 | ||
elif s==0: | ||
R.append('-') | ||
S.append(self.B[j-1]) | ||
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | ||
j=j-1 | ||
while self.dpV[j][i]<0 and j>=1 and self.dpE[j][i]+self.gap(j)-self.gap(j-1)<self.dpV[j][i]+self.gap(1): | ||
S.append(self.B[j-1]) | ||
R.append('-') | ||
#print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | ||
j=j-1 | ||
elif s==1: | ||
S.append('-') | ||
R.append(self.A[i-1]) | ||
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | ||
i=i-1 | ||
while self.dpV[j][i]<0 and i>=1 and self.dpF[j][i]+self.gap(i)-self.gap(i-1)<self.dpV[j][i]+self.gap(1): | ||
S.append('-') | ||
R.append(self.A[i-1]) | ||
#print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S)) | ||
i=i-1 | ||
s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]]) | ||
if i==0: | ||
while j>0 and self.dpV[j][i]<0: | ||
#R.append('-') | ||
S.append(self.B[j-1]) | ||
j=j-1 | ||
|
||
elif j==0: | ||
while i>0 and self.dpV[j][i]<0: | ||
#S.append('-') | ||
R.append(self.A[i-1]) | ||
i=i-1 | ||
|
||
self.Rx= ''.join(reversed(R)) | ||
self.Sx=''.join(reversed(S)) | ||
self.tb=True | ||
return(self.Rx,self.Sx) | ||
|
||
|
||
s=readFASTA("rosalind_loca.txt") | ||
|
||
def pam250_penalty(x,y): | ||
return -pam250_score(x,y) | ||
|
||
def blosum62_penalty(x,y): | ||
return -blosum62_score(x,y) | ||
|
||
def agap(j): | ||
return 5*j | ||
|
||
#print len(s.values()[0]) | ||
#print len(s.values()[1]) | ||
A=s.values()[0] | ||
B=s.values()[1] | ||
|
||
#A='MEANLYPRTEINSTRING' | ||
#B='PLEASANTLYEINSTEIN' | ||
|
||
X=LocalAlignment(A,B,gap=agap,match_score=pam250_penalty) | ||
(R,S)=X.aligned() | ||
print -X.score() | ||
print R.replace('-','') | ||
print S.replace('-','') | ||
|
||
|
||
|
||
|
Oops, something went wrong.