Skip to content

Commit

Permalink
fixed some issues
Browse files Browse the repository at this point in the history
  • Loading branch information
sun13005 committed May 9, 2017
1 parent 2bd003a commit cfbd353
Showing 1 changed file with 68 additions and 68 deletions.
136 changes: 68 additions & 68 deletions brnlenMCMC.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
##################################
##########################################################################################
# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree
# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model
###################################
# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model,
# and performs MCMC on branch length parameter
##########################################################################################


import readSeq
Expand All @@ -10,6 +11,18 @@
from itertools import chain
from scipy.stats import gamma
from math import exp, log


##########################################################################################
alpha = 0.5 #gamma shape parameter for rate categories
n_gen = 50000
save_every = 50
mean_expo = 10. #mean_expo = mean of exponential distribution for branch length prior
##########################################################################################




class node(object):
def __init__(self, ndnum): # initialization function
self.rsib = None # right sibling
Expand All @@ -21,8 +34,6 @@ def __init__(self, ndnum): # initialization function
self.partial = None # will have length 4*npatterns




def __str__(self):
# __str__ is a built-in function that is used by print to show an object
descendants_as_string = ','.join(['%d' % d for d in self.descendants])
Expand All @@ -43,12 +54,12 @@ def __str__(self):




def allocatePartial(node, patterns, rates):
if node.number > 0:
npatterns = len(patterns)
# print 'npat', npatterns
node.partial = [0.0]*(4*4*npatterns)

if node.partial is None:
node.partial = [0.0]*(4*4*npatterns)
# print len(node.partial)
for i,pattern in enumerate(patterns.keys()):
base = pattern[node.number-1]
Expand All @@ -67,24 +78,18 @@ def allocatePartial(node, patterns, rates):
assert(False), 'oops, something went horribly wrong!'

else:
# rt = [0.03338775, 0.25191592, 0.82026848, 2.89442785]
# rt = [2.89442785]

# rt = [1.0, 1.0, 1.0, 1.0]


npatterns = len(patterns)
# print 'patterns=', patterns
node.partial = [0.0]*(4*4*npatterns)
if node.partial is None:
node.partial = [0.0]*(4*4*npatterns)

like_list = []
for i,pattern in enumerate(patterns.keys()):
# print i, pattern, patterns.keys()
m_list = []
num_pattern = patterns[pattern]
# print num_pattern

for l,m in enumerate(rates):


psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0))
pdiff = (0.25-0.25*exp(-4.0*m*(node.lchild.edgelen)/3.0))

Expand Down Expand Up @@ -168,88 +173,83 @@ def replaedglen():

def mcmcbrn(postorder, patterns, rates):
nodes = readnewick(treenewick())

run = 1
mcmc = 0
output = os.path.join('brnlenMCMC_results.txt')
newf = open(output, 'w')
newf.write('%s\t'%('run'))

newf.write('%s\t'%('n_gen'))
newf.write( '%s\t%s\t'%('LnL','LnPr'))
for nl in postorder:
newf.write( 'node%s\t'%(nl.number))
newf.write('\n')
start_log_prior = 0.0
for nd in nodes:
start_log_prior += (-nd.edgelen/mean_expo)-(log(mean_expo))
start_log_like = prepareTree(nodes, patterns, rates)


newf.write('%s\t'%(mcmc))
print 'mcmc gen=', mcmc,
print
print 'mcmc gen=', mcmc
print start_log_like, start_log_prior,

newf.write( '%.6f\t%.6f\t'%(start_log_like,start_log_prior))
for nl in postorder:
newf.write( '%.6f\t'%(nl.edgelen))
print nl.edgelen,
newf.write('\n')
print
for r in range(run):
b = 0.1
print
print '**************************'

# for i,nd in enumerate(postorder):
newf.write('\n')
# print
for r in range(n_gen):
for i in range(len(postorder)):

print 'mcmc gen=', mcmc+1,
print

# print 'current',
print 'current_node.edgelen=', nodes[i].number, nodes[i].edgelen
# print
preedgelen = nodes[i].edgelen
# print 'nodes[i]=', nodes[i].edgelen
currentlike = prepareTree(nodes, patterns, rates)
# currentlike = 0.0
currentprior = (-nodes[i].edgelen/b)-(log(b))
# newf.write('%s\t'%(currentprior))
currentprior = 0.0
for nd in nodes:
currentprior += (-nd.edgelen/mean_expo)-(log(mean_expo))
current_ln_posterior = currentlike + currentprior


u = random.random()
m = exp(0.2*(u-0.5))
nodes[i].edgelen = preedgelen*m
print 'proposed_node.edgelen=',nodes[i].number, nodes[i].edgelen
# print 'new_edgelen=', nodes[i].edgelen
proposedprior = (-nodes[i].edgelen/b)-(log(b))
# newf.write('%s\t'%(proposedprior))

proposedtlike = prepareTree(nodes, patterns, rates)
# proposedtlike = 0.0
proposed_ln_posterior = proposedtlike + proposedprior
print
print 'current prior, currentlike=', currentprior, currentlike
print 'proposed prior, proposedlike=', proposedprior, proposedtlike
print
proposedprior = 0.0
for nd in nodes:
proposedprior += (-nd.edgelen/mean_expo)-(log(mean_expo))

proposedlike = prepareTree(nodes, patterns, rates)
proposed_ln_posterior = proposedlike + proposedprior
hastings_ratio = log(m)
print 'hastings_ratio', hastings_ratio
logR = proposed_ln_posterior - current_ln_posterior + hastings_ratio
print 'logR=', logR
u2 = random.random()
print 'log-u2=', log(u2)
if log(u2) < logR:
nodes[i].edgelen = nodes[i].edgelen
print 'log(u2) < logR so new proposal accepted..'
log_prior = proposedprior
log_likelihood = proposedlike
# print 'log(u2) < logR so new proposal accepted..'
else:
nodes[i].edgelen = preedgelen
print 'failed accept new proposal..'
log_prior = currentprior
log_likelihood = currentlike

# print 'log(u2) > logR so failed to accept new proposal..'

if (r+1) % save_every == 0:
newf.write('%s\t'%(mcmc+1))
print
print 'proposed=',
print 'mcmc gen=', mcmc+1
print log_likelihood,log_prior,
newf.write( '%.6f\t%.6f\t'%(log_likelihood,log_prior))
for j,k in enumerate(nodes):
newf.write( '%.6f\t'%(k.edgelen))
print k.edgelen,
newf.write('\n')
mcmc+=1

print
print '*********************************'
print '------------------------------------------------------------------------------'


print '**************************'

newf.flush()

mcmc+=1


def treenewick():
script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
Expand Down Expand Up @@ -467,7 +467,7 @@ def calcExpectedHeight(num_species, mu_over_s):


if __name__ == '__main__':
random_seed = 346549 # 7632557, 12345
random_seed = 348889 # 7632557, 12345
number_of_species = 5
mutation_speciation_rate_ratio = 0.689655172 # 0.689655172 # yields tree height 1 for 6 species
random.seed(random_seed)
Expand All @@ -484,9 +484,9 @@ def calcExpectedHeight(num_species, mu_over_s):
# print ' expected height =',expected_height
actual_height = calcActualHeight(species_tree_root)
# print ' actual height =',actual_height
print ' newick: ',newick[0]
print 'true tree: ',newick[0]
print '**************************'

alpha = 0.5 ### gamma shape parameter rate categories
# yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)'
rates_list = gammaRates(alpha)
postorder = readnewick(treenewick())
Expand Down

0 comments on commit cfbd353

Please sign in to comment.