Skip to content

Commit

Permalink
updated..
Browse files Browse the repository at this point in the history
  • Loading branch information
sun13005 committed Apr 29, 2017
1 parent 9bff746 commit 49c9e03
Show file tree
Hide file tree
Showing 7 changed files with 556 additions and 133 deletions.
File renamed without changes.
4 changes: 2 additions & 2 deletions readseq.py → readSeq.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ def patterns():
from collections import Counter

script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
path = os.path.join(script_dir, 'nexus')
# path = os.path.join(script_dir, 'nexus')

genes = []
data = {}
# print 'Reading nexus files...'
for filename in glob.glob(os.path.join(path, '*.nex')):
for filename in glob.glob(os.path.join(script_dir, '*.nex')):

m = re.match('(.+).nex', os.path.basename(filename))
gene_name = m.group(1)
Expand Down
22 changes: 22 additions & 0 deletions readtree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# def treenewck():
# script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
# path = os.path.join(script_dir, 'nexus')
# for filename in glob.glob(os.path.join(path, '*.tre*')):
# f = open(filename, 'r').read()

import re, os, glob, itertools, fnmatch, sys, shutil
# dirname, filename = os.path.split(os.path.abspath(__file__))
def treenewick():
script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
# print script_dir
path = os.path.join(script_dir, 'tree.tre')
with open(path, 'r') as content:
newick = content.read()
return newick
a = treenewick()
print a


# dirname, filename = os.path.split(os.path.abspath(__file__))
# print "running from", dirname
# print "file is", filename
20 changes: 20 additions & 0 deletions simulated.nex

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tree.tre
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)
242 changes: 111 additions & 131 deletions treeLike.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,133 +19,7 @@ def __init__(self, ndnum): # initialization function
self.descendants = set([ndnum]) # set containing descendant leaf set
self.partial = None # will have length 4*npatterns

def allocatePartial(self, patterns):
# r = 1.0
rate_list = [0.03338775, 0.25191592, 0.82026848, 2.89442785]
# rate_list = [1.0, 1.0, 1.0, 1.0]

if self.number > 0:
npatterns = len(patterns)
self.partial = [0.0]*(4*npatterns)
for i,pattern in enumerate(patterns.keys()):
base = pattern[self.number-1]
if base == 'A':
self.partial[i*4 + 0] = 1.0
elif base == 'C':
self.partial[i*4 + 1] = 1.0
elif base == 'G':
self.partial[i*4 + 2] = 1.0
elif base == 'T':
self.partial[i*4 + 3] = 1.0
else:
assert(False), 'oops, something went horribly wrong!'

else:
npatterns = len(patterns)
self.partial = [0.0]*(4*npatterns)
like_list = []
for i,pattern in enumerate(patterns.keys()):
site_s = 0.0
# site_s2 = 0.0
sites_s = []
for r in rate_list:
psame = (0.25+0.75*exp(-4.0*r*(self.lchild.edgelen)/3.0))
pdiff = (0.25-0.25*exp(-4.0*r*(self.lchild.edgelen)/3.0))

psame2 = (0.25+0.75*exp(-4.0*r*(self.lchild.rsib.edgelen)/3.0))
pdiff2 = (0.25-0.25*exp(-4.0*r*(self.lchild.rsib.edgelen)/3.0))



num_pattern = patterns[pattern]
# print 'num_pattern=', num_pattern

pAA = psame*(self.lchild.partial[i*4 + 0])
pAC = pdiff*(self.lchild.partial[i*4 + 1])
pAG = pdiff*(self.lchild.partial[i*4 + 2])
pAT = pdiff*(self.lchild.partial[i*4 + 3])

pAA2 = psame2*(self.lchild.rsib.partial[i*4 + 0])
pAC2 = pdiff2*(self.lchild.rsib.partial[i*4 + 1])
pAG2 = pdiff2*(self.lchild.rsib.partial[i*4 + 2])
pAT2 = pdiff2*(self.lchild.rsib.partial[i*4 + 3])

pfromA_lchild = pAA+pAC+pAG+pAT
pfromA_rchild = pAA2+pAC2+pAG2+pAT2
self.partial[i*4 + 0] = pfromA_lchild*pfromA_rchild

######################################################

pCA = pdiff*(self.lchild.partial[i*4 + 0])
pCC = psame*(self.lchild.partial[i*4 + 1])
pCG = pdiff*(self.lchild.partial[i*4 + 2])
pCT = pdiff*(self.lchild.partial[i*4 + 3])

pCA2 = pdiff2*(self.lchild.rsib.partial[i*4 + 0])
pCC2 = psame2*(self.lchild.rsib.partial[i*4 + 1])
pCG2 = pdiff2*(self.lchild.rsib.partial[i*4 + 2])
pCT2 = pdiff2*(self.lchild.rsib.partial[i*4 + 3])

pfromC_lchild = pCA+pCC+pCG+pCT
pfromC_rchild = pCA2+pCC2+pCG2+pCT2
self.partial[i*4 + 1] = pfromC_lchild*pfromC_rchild

#######################################################
#
pGA = pdiff*(self.lchild.partial[i*4 + 0])
pGC = pdiff*(self.lchild.partial[i*4 + 1])
pGG = psame*(self.lchild.partial[i*4 + 2])
pGT = pdiff*(self.lchild.partial[i*4 + 3])
#
pGA2 = pdiff2*(self.lchild.rsib.partial[i*4 + 0])
pGC2 = pdiff2*(self.lchild.rsib.partial[i*4 + 1])
pGG2 = psame2*(self.lchild.rsib.partial[i*4 + 2])
pGT2 = pdiff2*(self.lchild.rsib.partial[i*4 + 3])
#
pfromG_lchild = pGA+pGC+pGG+pGT
pfromG_rchild = pGA2+pGC2+pGG2+pGT2
self.partial[i*4 + 2] = pfromG_lchild*pfromG_rchild

#######################################################

pTA = pdiff*(self.lchild.partial[i*4 + 0])
pTC = pdiff*(self.lchild.partial[i*4 + 1])
pTG = pdiff*(self.lchild.partial[i*4 + 2])
pTT = psame*(self.lchild.partial[i*4 + 3])
#
pTA2 = pdiff2*(self.lchild.rsib.partial[i*4 + 0])
pTC2 = pdiff2*(self.lchild.rsib.partial[i*4 + 1])
pTG2 = pdiff2*(self.lchild.rsib.partial[i*4 + 2])
pTT2 = psame2*(self.lchild.rsib.partial[i*4 + 3])
#
pfromT_lchild = pTA+pTC+pTG+pTT
pfromT_rchild = pTA2+pTC2+pTG2+pTT2
self.partial[i*4 + 3] = pfromT_lchild*pfromT_rchild

#########################################################

site_like = sum(self.partial[i*4:i*4+4])*0.25
# site_log_like2 = ((sum(self.partial[i*4:i*4+4]))*0.25)

# print 'site_log_like=', site_log_like
site_s += site_like*0.25
print 'site_s=', site_s
# site_s2 += log(site_log_like2)*0.25
# print 'site_s2=', site_s2

# sites_s.append(site_log_like)
# print 'site_s=', site_s
# print 'sites_s=', sum(sites_s), sites_s
site_log_like = log(site_s)*num_pattern
like_list.append(site_log_like)
# print 'like_list=', like_list

log_Like = sum(like_list)
# print like_list
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'

return log_Like
# print 'log-like=', log_Like

def __str__(self):
# __str__ is a built-in function that is used by print to show an object
descendants_as_string = ','.join(['%d' % d for d in self.descendants])
Expand All @@ -163,12 +37,118 @@ def __str__(self):
parstr = '%d' % self.par.number

return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string)

def allocatePartial(node, patterns):
if node.number > 0:
npatterns = len(patterns)
node.partial = [0.0]*(4*npatterns)
for i,pattern in enumerate(patterns.keys()):
base = pattern[node.number-1]
if base == 'A':
node.partial[i*4 + 0] = 1.0
elif base == 'C':
node.partial[i*4 + 1] = 1.0
elif base == 'G':
node.partial[i*4 + 2] = 1.0
elif base == 'T':
node.partial[i*4 + 3] = 1.0
else:
assert(False), 'oops, something went horribly wrong!'

else:
npatterns = len(patterns)
node.partial = [0.0]*(4*npatterns)
like_list = []
for i,pattern in enumerate(patterns.keys()):
psame = (0.25+0.75*exp(-4.0*(node.lchild.edgelen)/3.0))
pdiff = (0.25-0.25*exp(-4.0*(node.lchild.edgelen)/3.0))

psame2 = (0.25+0.75*exp(-4.0*(node.lchild.rsib.edgelen)/3.0))
pdiff2 = (0.25-0.25*exp(-4.0*(node.lchild.rsib.edgelen)/3.0))

num_pattern = patterns[pattern]

pAA = psame*(node.lchild.partial[i*4 + 0])
pAC = pdiff*(node.lchild.partial[i*4 + 1])
pAG = pdiff*(node.lchild.partial[i*4 + 2])
pAT = pdiff*(node.lchild.partial[i*4 + 3])

pAA2 = psame2*(node.lchild.rsib.partial[i*4 + 0])
pAC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1])
pAG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2])
pAT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3])

pfromA_lchild = pAA+pAC+pAG+pAT
pfromA_rchild = pAA2+pAC2+pAG2+pAT2
node.partial[i*4 + 0] = pfromA_lchild*pfromA_rchild

######################################################

pCA = pdiff*(node.lchild.partial[i*4 + 0])
pCC = psame*(node.lchild.partial[i*4 + 1])
pCG = pdiff*(node.lchild.partial[i*4 + 2])
pCT = pdiff*(node.lchild.partial[i*4 + 3])

pCA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0])
pCC2 = psame2*(node.lchild.rsib.partial[i*4 + 1])
pCG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2])
pCT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3])

pfromC_lchild = pCA+pCC+pCG+pCT
pfromC_rchild = pCA2+pCC2+pCG2+pCT2
node.partial[i*4 + 1] = pfromC_lchild*pfromC_rchild

#######################################################
#
pGA = pdiff*(node.lchild.partial[i*4 + 0])
pGC = pdiff*(node.lchild.partial[i*4 + 1])
pGG = psame*(node.lchild.partial[i*4 + 2])
pGT = pdiff*(node.lchild.partial[i*4 + 3])
#
pGA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0])
pGC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1])
pGG2 = psame2*(node.lchild.rsib.partial[i*4 + 2])
pGT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3])
#
pfromG_lchild = pGA+pGC+pGG+pGT
pfromG_rchild = pGA2+pGC2+pGG2+pGT2
node.partial[i*4 + 2] = pfromG_lchild*pfromG_rchild

#######################################################

pTA = pdiff*(node.lchild.partial[i*4 + 0])
pTC = pdiff*(node.lchild.partial[i*4 + 1])
pTG = pdiff*(node.lchild.partial[i*4 + 2])
pTT = psame*(node.lchild.partial[i*4 + 3])
#
pTA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0])
pTC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1])
pTG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2])
pTT2 = psame2*(node.lchild.rsib.partial[i*4 + 3])
#
pfromT_lchild = pTA+pTC+pTG+pTT
pfromT_rchild = pTA2+pTC2+pTG2+pTT2
node.partial[i*4 + 3] = pfromT_lchild*pfromT_rchild

#########################################################

site_log_like = (log((sum(node.partial[i*4:i*4+4]))*0.25))*num_pattern
like_list.append(site_log_like)

log_Like = sum(like_list)
return log_Like

def treenewick():
script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
path = os.path.join(script_dir, 'tree.tre')
with open(path, 'r') as content:
newick = content.read()
return newick

def prepareTree(postorder, patterns):
likelihood_lists = []
for nd in postorder:
likelihood_lists.append(nd.allocatePartial(patterns))
likelihood_lists.append(allocatePartial(nd, patterns))
print 'log-likelihood of the topology =', likelihood_lists[-1]

def joinRandomPair(node_list, next_node_number, is_deep_coalescence):
Expand Down Expand Up @@ -381,7 +361,7 @@ def calcExpectedHeight(num_species, mu_over_s):
# print ' newick: ',newick[0]


yuletree = '(((1:0.03915,5:0.03915):0.387,(4:0.42253,2:0.42253):0.004):0.118,3:0.54433)'
# yuletree = '(((1:0.03915,5:0.03915):0.387,(4:0.42253,2:0.42253):0.004):0.118,3:0.54433)'

postorder = readnewick(yuletree)
result = prepareTree(postorder, readSeq.patterns())
postorder = readnewick(treenewick())
result = prepareTree(postorder, readSeq.patterns())
Loading

0 comments on commit 49c9e03

Please sign in to comment.