Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added many files from Rosalind project and related genomics work.
  • Loading branch information
jet08013 committed May 13, 2015
1 parent 9ece619 commit e8980d5
Show file tree
Hide file tree
Showing 128 changed files with 9,018 additions and 0 deletions.
28 changes: 28 additions & 0 deletions ABO.py
@@ -0,0 +1,28 @@
import numpy as np
import matplotlib.pylab as plt

pA,pB,pO=1.0/3,1.0/3,1.0/3
nA,nB,nAB,nO=25,25,50,15

N=nA+nB+nAB+nO

print pA, pB,pO

for i in range(20):

EAA=pA**2*nA/(pA**2+2*pA*pO)
EAO=2*pA*pO*nA/(pA**2+2*pA*pO)
EBB=pB**2*nB/(pB**2+2*pB*pO)
EBO=2*pB*pO*nB/(pB**2+2*pB*pO)
EAB=nAB
EOO=nO

pA=(2*EAA+EAO+EAB)/(2*N)
pB=(2*EBB+EBO+EAB)/(2*N)
pO=(2*EOO+EAO+EBO)/(2*N)

like=nA*np.log(pA**2+2*pA*pO)+nB*np.log(pB**2+2*pB*pO)+nAB*np.log(2*pA*pB)+nO*np.log(pO**2)

print pA, pB,pO,like

print EAA,EAO,EAB,EBB,EBO,EOO
58 changes: 58 additions & 0 deletions EM.py
@@ -0,0 +1,58 @@
import operator
import numpy as np
import numpy.random as rm
lsave=0
#theta=[pi,L11-L14,L21-L24,R11-R14,R21-R24]
A={}
for xx in range(500):
u=np.array([[4,2,2,2],[2,4,2,2],[2,2,4,2],[2,2,2,4]])
L1=rm.random_sample(4)
L2=rm.random_sample(4)
R1=rm.random_sample(4)
R2=rm.random_sample(4)
L1=L1/sum(L1)
L2=L1/sum(L2)
R1=R1/sum(R1)
R2=R2/sum(R2)
pi=rm.random_sample(1)[0]
N=sum(u.reshape(16,1))[0]

while True:
FL=np.array([[pi*L1[i]*L2[j] for i in range(4)] for j in range(4)])
FR=np.array([[(1-pi)*R1[i]*R2[j] for i in range(4)] for j in range(4)])
F=FL+FR
lobs=sum((u*np.log(F)).reshape(16,1))[0]
uL=u*FL/F
NL=sum(uL.reshape(16,1))[0]
uR=u*FR/F
NR=sum(uR.reshape(16,1))[0]

pistar=sum(uL.reshape(16,1))[0]/N
L1star=sum(uL.T)/NL
L2star=sum(uL)/NL
R1star=sum(uR.T)/NR
R2star=sum(uR)/NR

if np.abs(lobs-lsave)<.000001:
A[lobs]=F
break

R1=R1star
R2=R2star
L2=L2star
L1=L1star
pi=pistar
lsave=lobs

print 40*A[sorted(A.iterkeys(),reverse=True)[0]]











63 changes: 63 additions & 0 deletions EMBOSSwater.py
@@ -0,0 +1,63 @@
import urllib
import urllib2
from fasta import readFASTA
from collections import OrderedDict

url = 'http://www.ebi.ac.uk/Tools/services/rest/emboss_water/'
s=readFASTA("rosalind_laff.txt")
param_dict={
'email':'jeremy.teitelbaum@uconn.edu',
'title':'Local Alignment',
'matrix': 'EBLOSUM62',
'format':'markx3',
'gapopen':15,
'gapext':1,
}
param_dict['asequence']=s.values()[0]
param_dict['sid1']='A'
param_dict['bsequence']=s.values()[1]
param_dict['sid2']='B'
params = urllib.urlencode(param_dict)

response = urllib2.urlopen(url+'run/', params).read()
while urllib2.urlopen(url+'status/'+response).read()!="FINISHED":
continue

answer=urllib2.urlopen(url+'resulttypes/'+response).read()

final=urllib2.urlopen(url+'result/'+response+'/out').read()
ls= [x for x in final.split('\n') if len(x)>0]
value,key="",""
s=OrderedDict({})
j=0
for x in ls:
if x[0] in ['#',';']:
#print x
continue

if x[0]==">":

if key!="":
s[key]=value
key=x[1:].split()[0]+'-'+str(j)

j=j+1
value=""
else:
value=value+x.strip()

s[key]=value

R=''
for x in s.values()[0]:
if x!='-':
R+=x
S=''
for x in s.values()[1]:
if x!='-':
S+=x
print R
print
print S
#print
#print s.values()[1]
61 changes: 61 additions & 0 deletions Experiments.py
@@ -0,0 +1,61 @@

import numpy as np
from pymc import *
import matplotlib.pyplot as plt
from matplotlib.pyplot import scatter,show
H,R=4,0
x=DiscreteUniform('X',lower=1,upper=4,size=4)


@potential
def L(H=4,R=0,x=x):
p=0
for i in range(1,4):
if x[i]==x[0]:
p+=H
else:
p+=R
return p

fig,axes=plt.subplots(nrows=2,ncols=1)

N,t=5000,1
model=MCMC([x,L])
model.sample(N,thin=t)
print '!'
#u=x.trace()[:,0]+np.random.normal(0,0.1,N/t)
#v=x.trace()[:,1]+np.random.normal(0,0.1,N/t)
#scatter(u,v)
#show()
z=model.trace('X')[:]
s=0.0
print
for n in z:
if n[0]==n[1]:
s+=1
print s/len(z)
print np.exp(H)/(np.exp(H)+3*np.exp(R))
u=z[:,0]+np.random.normal(0,.1,len(z[:,0]))
v=z[:,1]+np.random.normal(0,.1,len(z[:,1]))
axes[0].scatter(u,v)
Hval,Rval=5,1
model.sample(N,thin=t)

z=model.trace('X')[:]
u=z[:,0]+np.random.normal(0,.1,len(z[:,0]))
v=z[:,1]+np.random.normal(0,.1,len(z[:,1]))
axes[1].scatter(u,v)

show()



#import pydot
#graph=pydot.Dot(graph_type='graph')
#graph.set_graphviz_executables({'dot':'C:\\Program Files (x86)\Graphviz2.38\\bin\dot.exe'})

#import scipy.misc
#pymc.graph.graph(model,path='Desktop',name='my_graph',format='png',prog='C:\Program Files (x86)\Graphviz2.38\\bin\dot.exe')
#plt.figure(figsize=(8,8))
#plt.imshow(scipy.misc.imread('my_graph.png'))
#plt.close()
9 changes: 9 additions & 0 deletions LewisHW3Distances.py
@@ -0,0 +1,9 @@
import numpy

differences=[471,568,520,534,501,499]
n=2205
pdistances=[differences[i]/n for i in range(6)]
jcdistances=[-3/4*log(1-4/3*pdistances[i]) for i in range(6)]

print pdistances
print jcdistances
149 changes: 149 additions & 0 deletions LocalAlignment.py
@@ -0,0 +1,149 @@
import numpy as np
from scores import *
from fasta import readFASTA
# this is an implementation of the smith-waterman algorithm


class LocalAlignment:
def __init__(self,A,B,match_score=None,gap=None):
if match_score is None:
def match_score(A,B):
if A!=B:
return 1
else:
return -1
self.match_score=match_score
if gap is None:
def gap(x):
return x
self.gap=gap

self.Rx=None
self.Sx=None
self.tb=False
self.A=A
self.B=B
self.nA=len(self.A)+1
self.nB=len(self.B)+1
self.dpV=np.zeros((self.nB,self.nA),dtype=int)
self.dpG=np.zeros((self.nB,self.nA),dtype=int)
self.dpE=np.zeros((self.nB,self.nA),dtype=int)
self.dpF=np.zeros((self.nB,self.nA),dtype=int)
for i in range(1,self.nA):
#print i
for j in range(1,self.nB):
if (i,j)==(1,1):
self.dpG[j][i]=self.match_score(B[0],A[0])
self.dpE[j][i]=0
self.dpF[j][i]=0
if i==1 and j>1:
self.dpG[j][i]=self.match_score(B[j-1],A[0])
self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1))
self.dpF[j][i]=self.gap(j)+gap(1)
if i>1 and j==1:
self.dpG[j][i]=self.match_score(B[0],A[i-1])
self.dpE[j][i]=self.gap(i)+gap(1)
self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1))
if i>1 and j>1:
self.dpG[j][i]=self.dpV[j-1][i-1]+self.match_score(B[j-1],A[i-1])
self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1))
self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1))

self.dpV[j][i]=min(0,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i])

if (i,j)==(1,1):
self.minV=self.dpV[j][i]
self.mini=i
self.minj=j
else:
if self.dpV[j][i]<self.minV:
self.minV=self.dpV[j][i]
self.mini=i
self.minj=j


def score(self):
return self.minV

def aligned(self):
if self.tb:
return (self.Rx,self.Sx)
i=self.mini
j=self.minj
R=[]
S=[]
s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]])
while i>=1 and j>=1 and self.dpV[j][i]<0:
if s==2:
R.append(self.A[i-1])
S.append(self.B[j-1])
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
i=i-1
j=j-1
elif s==0:
R.append('-')
S.append(self.B[j-1])
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
j=j-1
while self.dpV[j][i]<0 and j>=1 and self.dpE[j][i]+self.gap(j)-self.gap(j-1)<self.dpV[j][i]+self.gap(1):
S.append(self.B[j-1])
R.append('-')
#print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
j=j-1
elif s==1:
S.append('-')
R.append(self.A[i-1])
#print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
i=i-1
while self.dpV[j][i]<0 and i>=1 and self.dpF[j][i]+self.gap(i)-self.gap(i-1)<self.dpV[j][i]+self.gap(1):
S.append('-')
R.append(self.A[i-1])
#print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
i=i-1
s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]])
if i==0:
while j>0 and self.dpV[j][i]<0:
#R.append('-')
S.append(self.B[j-1])
j=j-1

elif j==0:
while i>0 and self.dpV[j][i]<0:
#S.append('-')
R.append(self.A[i-1])
i=i-1

self.Rx= ''.join(reversed(R))
self.Sx=''.join(reversed(S))
self.tb=True
return(self.Rx,self.Sx)


s=readFASTA("rosalind_loca.txt")

def pam250_penalty(x,y):
return -pam250_score(x,y)

def blosum62_penalty(x,y):
return -blosum62_score(x,y)

def agap(j):
return 5*j

#print len(s.values()[0])
#print len(s.values()[1])
A=s.values()[0]
B=s.values()[1]

#A='MEANLYPRTEINSTRING'
#B='PLEASANTLYEINSTEIN'

X=LocalAlignment(A,B,gap=agap,match_score=pam250_penalty)
(R,S)=X.aligned()
print -X.score()
print R.replace('-','')
print S.replace('-','')




0 comments on commit e8980d5

Please sign in to comment.