Added many files from Rosalind project and related genomics work.

jet08013 · May 13, 2015 · e8980d5 · e8980d5
1 parent 9ece619
commit e8980d5
Show file tree

Hide file tree

Showing 128 changed files with 9,018 additions and 0 deletions.
diff --git a/ABO.py b/ABO.py
@@ -0,0 +1,28 @@
+import numpy as np
+import matplotlib.pylab as plt
+
+pA,pB,pO=1.0/3,1.0/3,1.0/3
+nA,nB,nAB,nO=25,25,50,15
+
+N=nA+nB+nAB+nO
+
+print pA, pB,pO
+
+for i in range(20):
+
+    EAA=pA**2*nA/(pA**2+2*pA*pO)
+    EAO=2*pA*pO*nA/(pA**2+2*pA*pO)
+    EBB=pB**2*nB/(pB**2+2*pB*pO)
+    EBO=2*pB*pO*nB/(pB**2+2*pB*pO)
+    EAB=nAB
+    EOO=nO
+
+    pA=(2*EAA+EAO+EAB)/(2*N)
+    pB=(2*EBB+EBO+EAB)/(2*N)
+    pO=(2*EOO+EAO+EBO)/(2*N)
+
+    like=nA*np.log(pA**2+2*pA*pO)+nB*np.log(pB**2+2*pB*pO)+nAB*np.log(2*pA*pB)+nO*np.log(pO**2)
+
+    print pA, pB,pO,like
+
+print EAA,EAO,EAB,EBB,EBO,EOO
diff --git a/EM.py b/EM.py
@@ -0,0 +1,58 @@
+import operator
+import numpy as np
+import numpy.random as rm
+lsave=0
+#theta=[pi,L11-L14,L21-L24,R11-R14,R21-R24]
+A={}
+for xx in range(500):
+    u=np.array([[4,2,2,2],[2,4,2,2],[2,2,4,2],[2,2,2,4]])
+    L1=rm.random_sample(4)
+    L2=rm.random_sample(4)
+    R1=rm.random_sample(4)
+    R2=rm.random_sample(4)
+    L1=L1/sum(L1)
+    L2=L1/sum(L2)
+    R1=R1/sum(R1)
+    R2=R2/sum(R2)
+    pi=rm.random_sample(1)[0]
+    N=sum(u.reshape(16,1))[0]
+
+    while True:
+        FL=np.array([[pi*L1[i]*L2[j] for i in range(4)] for j in range(4)])
+        FR=np.array([[(1-pi)*R1[i]*R2[j] for i in range(4)] for j in range(4)])
+        F=FL+FR
+        lobs=sum((u*np.log(F)).reshape(16,1))[0]
+        uL=u*FL/F
+        NL=sum(uL.reshape(16,1))[0]
+        uR=u*FR/F
+        NR=sum(uR.reshape(16,1))[0]
+
+        pistar=sum(uL.reshape(16,1))[0]/N
+        L1star=sum(uL.T)/NL
+        L2star=sum(uL)/NL
+        R1star=sum(uR.T)/NR
+        R2star=sum(uR)/NR
+
+        if np.abs(lobs-lsave)<.000001:
+            A[lobs]=F
+            break
+
+        R1=R1star
+        R2=R2star
+        L2=L2star
+        L1=L1star
+        pi=pistar
+        lsave=lobs
+
+print 40*A[sorted(A.iterkeys(),reverse=True)[0]]
+
+
+
+
+
+
+
+
+
+
+
diff --git a/EMBOSSwater.py b/EMBOSSwater.py
@@ -0,0 +1,63 @@
+import urllib
+import urllib2
+from fasta import readFASTA
+from collections import OrderedDict
+
+url = 'http://www.ebi.ac.uk/Tools/services/rest/emboss_water/'
+s=readFASTA("rosalind_laff.txt")
+param_dict={
+ 'email':'jeremy.teitelbaum@uconn.edu',
+ 'title':'Local Alignment',
+ 'matrix': 'EBLOSUM62',
+ 'format':'markx3',
+ 'gapopen':15,
+ 'gapext':1,
+}
+param_dict['asequence']=s.values()[0]
+param_dict['sid1']='A'
+param_dict['bsequence']=s.values()[1]
+param_dict['sid2']='B'
+params = urllib.urlencode(param_dict)
+
+response = urllib2.urlopen(url+'run/', params).read()
+while urllib2.urlopen(url+'status/'+response).read()!="FINISHED":
+    continue
+
+answer=urllib2.urlopen(url+'resulttypes/'+response).read()
+
+final=urllib2.urlopen(url+'result/'+response+'/out').read()
+ls= [x for x in final.split('\n') if len(x)>0]
+value,key="",""
+s=OrderedDict({})
+j=0
+for x in ls: 
+    if x[0] in ['#',';']:
+        #print x
+        continue  
+
+    if x[0]==">":
+
+        if key!="":
+            s[key]=value
+        key=x[1:].split()[0]+'-'+str(j)
+
+        j=j+1
+        value=""
+    else:
+        value=value+x.strip()
+
+    s[key]=value
+
+R=''
+for x in s.values()[0]:
+    if x!='-':
+        R+=x
+S=''
+for x in s.values()[1]:
+    if x!='-':
+        S+=x
+print R
+print
+print S
+#print 
+#print s.values()[1]
diff --git a/Experiments.py b/Experiments.py
@@ -0,0 +1,61 @@
+
+import numpy as np
+from pymc import *
+import matplotlib.pyplot as plt
+from matplotlib.pyplot import scatter,show
+H,R=4,0
+x=DiscreteUniform('X',lower=1,upper=4,size=4)
+
+
+@potential
+def L(H=4,R=0,x=x):
+    p=0
+    for i in range(1,4):
+        if x[i]==x[0]:
+            p+=H
+        else:
+            p+=R      
+    return p 
+
+fig,axes=plt.subplots(nrows=2,ncols=1)
+
+N,t=5000,1
+model=MCMC([x,L])
+model.sample(N,thin=t)   
+print '!'    
+#u=x.trace()[:,0]+np.random.normal(0,0.1,N/t)
+#v=x.trace()[:,1]+np.random.normal(0,0.1,N/t)
+#scatter(u,v)
+#show()
+z=model.trace('X')[:]
+s=0.0
+print
+for n in z:
+    if n[0]==n[1]:
+        s+=1
+print s/len(z)
+print np.exp(H)/(np.exp(H)+3*np.exp(R))
+u=z[:,0]+np.random.normal(0,.1,len(z[:,0]))
+v=z[:,1]+np.random.normal(0,.1,len(z[:,1]))
+axes[0].scatter(u,v)
+Hval,Rval=5,1
+model.sample(N,thin=t)       
+
+z=model.trace('X')[:]
+u=z[:,0]+np.random.normal(0,.1,len(z[:,0]))
+v=z[:,1]+np.random.normal(0,.1,len(z[:,1]))
+axes[1].scatter(u,v)
+
+show()
+
+
+
+#import pydot
+#graph=pydot.Dot(graph_type='graph')
+#graph.set_graphviz_executables({'dot':'C:\\Program Files (x86)\Graphviz2.38\\bin\dot.exe'})
+
+#import scipy.misc
+#pymc.graph.graph(model,path='Desktop',name='my_graph',format='png',prog='C:\Program Files (x86)\Graphviz2.38\\bin\dot.exe')
+#plt.figure(figsize=(8,8))
+#plt.imshow(scipy.misc.imread('my_graph.png'))
+#plt.close()
diff --git a/LewisHW3Distances.py b/LewisHW3Distances.py
@@ -0,0 +1,9 @@
+import numpy
+
+differences=[471,568,520,534,501,499]
+n=2205
+pdistances=[differences[i]/n for i in range(6)]
+jcdistances=[-3/4*log(1-4/3*pdistances[i]) for i in range(6)]
+
+print pdistances
+print jcdistances
diff --git a/LocalAlignment.py b/LocalAlignment.py
@@ -0,0 +1,149 @@
+import numpy as np
+from scores import *
+from fasta import readFASTA
+# this is an implementation of the smith-waterman algorithm
+
+
+class LocalAlignment:
+    def __init__(self,A,B,match_score=None,gap=None): 
+        if match_score is None:
+            def match_score(A,B):
+                if A!=B:
+                    return 1
+                else:
+                    return -1
+        self.match_score=match_score
+        if gap is None:
+            def gap(x):
+                return x
+        self.gap=gap
+
+        self.Rx=None
+        self.Sx=None
+        self.tb=False 
+        self.A=A
+        self.B=B  
+        self.nA=len(self.A)+1
+        self.nB=len(self.B)+1
+        self.dpV=np.zeros((self.nB,self.nA),dtype=int)
+        self.dpG=np.zeros((self.nB,self.nA),dtype=int)
+        self.dpE=np.zeros((self.nB,self.nA),dtype=int)
+        self.dpF=np.zeros((self.nB,self.nA),dtype=int)
+        for i in range(1,self.nA):
+            #print i
+            for j in range(1,self.nB):          
+                if (i,j)==(1,1): 
+                    self.dpG[j][i]=self.match_score(B[0],A[0])           
+                    self.dpE[j][i]=0   
+                    self.dpF[j][i]=0           
+                if i==1 and j>1:
+                    self.dpG[j][i]=self.match_score(B[j-1],A[0])
+                    self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1))         
+                    self.dpF[j][i]=self.gap(j)+gap(1)                              
+                if i>1 and j==1: 
+                    self.dpG[j][i]=self.match_score(B[0],A[i-1])        
+                    self.dpE[j][i]=self.gap(i)+gap(1)
+                    self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1))                   
+                if i>1 and j>1:
+                    self.dpG[j][i]=self.dpV[j-1][i-1]+self.match_score(B[j-1],A[i-1])
+                    self.dpE[j][i]=min(self.dpE[j-1][i]+self.gap(j)-self.gap(j-1),self.dpV[j-1][i]+self.gap(1))
+                    self.dpF[j][i]=min(self.dpF[j][i-1]+self.gap(i)-self.gap(i-1),self.dpV[j][i-1]+self.gap(1))               
+
+                self.dpV[j][i]=min(0,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i])
+
+                if (i,j)==(1,1):
+                    self.minV=self.dpV[j][i]
+                    self.mini=i
+                    self.minj=j
+                else:
+                    if self.dpV[j][i]<self.minV:
+                        self.minV=self.dpV[j][i]
+                        self.mini=i
+                        self.minj=j
+
+
+    def score(self):
+        return self.minV
+
+    def aligned(self):
+        if self.tb:
+            return (self.Rx,self.Sx)        
+        i=self.mini
+        j=self.minj
+        R=[]
+        S=[]
+        s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]])           
+        while i>=1 and j>=1 and self.dpV[j][i]<0:                       
+            if s==2:
+                R.append(self.A[i-1])
+                S.append(self.B[j-1])
+                #print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
+                i=i-1
+                j=j-1
+            elif s==0:
+                R.append('-')
+                S.append(self.B[j-1])
+                #print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
+                j=j-1
+                while   self.dpV[j][i]<0 and j>=1  and self.dpE[j][i]+self.gap(j)-self.gap(j-1)<self.dpV[j][i]+self.gap(1):
+                    S.append(self.B[j-1])
+                    R.append('-') 
+                    #print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
+                    j=j-1
+            elif s==1:
+                S.append('-')
+                R.append(self.A[i-1])
+                #print '**',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
+                i=i-1
+                while  self.dpV[j][i]<0 and i>=1 and self.dpF[j][i]+self.gap(i)-self.gap(i-1)<self.dpV[j][i]+self.gap(1):
+                    S.append('-')
+                    R.append(self.A[i-1]) 
+                    #print '*',i,j,s,self.dpE[j][i],self.dpF[j][i],self.dpG[j][i],''.join(reversed(R)),''.join(reversed(S))
+                    i=i-1                    
+            s=np.argmin([self.dpE[j][i],self.dpF[j][i],self.dpG[j][i]])                   
+        if i==0:
+            while j>0 and self.dpV[j][i]<0:
+                #R.append('-')
+                S.append(self.B[j-1])
+                j=j-1   
+
+        elif j==0:
+            while i>0 and self.dpV[j][i]<0:
+                #S.append('-')
+                R.append(self.A[i-1])
+                i=i-1
+
+        self.Rx= ''.join(reversed(R))
+        self.Sx=''.join(reversed(S))
+        self.tb=True
+        return(self.Rx,self.Sx)
+
+
+s=readFASTA("rosalind_loca.txt")
+
+def pam250_penalty(x,y):
+    return -pam250_score(x,y)
+
+def blosum62_penalty(x,y):
+    return -blosum62_score(x,y)
+
+def agap(j):
+    return 5*j
+
+#print len(s.values()[0])
+#print len(s.values()[1])
+A=s.values()[0]
+B=s.values()[1]
+
+#A='MEANLYPRTEINSTRING'
+#B='PLEASANTLYEINSTEIN'
+
+X=LocalAlignment(A,B,gap=agap,match_score=pam250_penalty)
+(R,S)=X.aligned()
+print -X.score()
+print R.replace('-','')
+print S.replace('-','')
+
+
+
+