From cfbd353a1afa8073d1ef7122b51463c2e9e6dea0 Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Tue, 9 May 2017 15:04:11 -0400 Subject: [PATCH] fixed some issues --- brnlenMCMC.py | 136 +++++++++++++++++++++++++------------------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/brnlenMCMC.py b/brnlenMCMC.py index e439ca3..4435021 100644 --- a/brnlenMCMC.py +++ b/brnlenMCMC.py @@ -1,7 +1,8 @@ -################################## +########################################################################################## # This script reads a nexus DNA matrix (through module readseq.py) and a newick tree -# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model -################################### +# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model, +# and performs MCMC on branch length parameter +########################################################################################## import readSeq @@ -10,6 +11,18 @@ from itertools import chain from scipy.stats import gamma from math import exp, log + + +########################################################################################## +alpha = 0.5 #gamma shape parameter for rate categories +n_gen = 50000 +save_every = 50 +mean_expo = 10. #mean_expo = mean of exponential distribution for branch length prior +########################################################################################## + + + + class node(object): def __init__(self, ndnum): # initialization function self.rsib = None # right sibling @@ -21,8 +34,6 @@ def __init__(self, ndnum): # initialization function self.partial = None # will have length 4*npatterns - - def __str__(self): # __str__ is a built-in function that is used by print to show an object descendants_as_string = ','.join(['%d' % d for d in self.descendants]) @@ -43,12 +54,12 @@ def __str__(self): - def allocatePartial(node, patterns, rates): if node.number > 0: npatterns = len(patterns) -# print 'npat', npatterns - node.partial = [0.0]*(4*4*npatterns) + + if node.partial is None: + node.partial = [0.0]*(4*4*npatterns) # print len(node.partial) for i,pattern in enumerate(patterns.keys()): base = pattern[node.number-1] @@ -67,24 +78,18 @@ def allocatePartial(node, patterns, rates): assert(False), 'oops, something went horribly wrong!' else: -# rt = [0.03338775, 0.25191592, 0.82026848, 2.89442785] -# rt = [2.89442785] - -# rt = [1.0, 1.0, 1.0, 1.0] - + npatterns = len(patterns) -# print 'patterns=', patterns - node.partial = [0.0]*(4*4*npatterns) + if node.partial is None: + node.partial = [0.0]*(4*4*npatterns) + like_list = [] for i,pattern in enumerate(patterns.keys()): -# print i, pattern, patterns.keys() m_list = [] num_pattern = patterns[pattern] -# print num_pattern for l,m in enumerate(rates): - psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0)) pdiff = (0.25-0.25*exp(-4.0*m*(node.lchild.edgelen)/3.0)) @@ -168,88 +173,83 @@ def replaedglen(): def mcmcbrn(postorder, patterns, rates): nodes = readnewick(treenewick()) - - run = 1 mcmc = 0 output = os.path.join('brnlenMCMC_results.txt') newf = open(output, 'w') - newf.write('%s\t'%('run')) - + newf.write('%s\t'%('n_gen')) + newf.write( '%s\t%s\t'%('LnL','LnPr')) for nl in postorder: newf.write( 'node%s\t'%(nl.number)) newf.write('\n') + start_log_prior = 0.0 + for nd in nodes: + start_log_prior += (-nd.edgelen/mean_expo)-(log(mean_expo)) + start_log_like = prepareTree(nodes, patterns, rates) + + newf.write('%s\t'%(mcmc)) - print 'mcmc gen=', mcmc, - print + print 'mcmc gen=', mcmc + print start_log_like, start_log_prior, + + newf.write( '%.6f\t%.6f\t'%(start_log_like,start_log_prior)) for nl in postorder: newf.write( '%.6f\t'%(nl.edgelen)) print nl.edgelen, - newf.write('\n') - print - for r in range(run): - b = 0.1 + print + print '**************************' -# for i,nd in enumerate(postorder): + newf.write('\n') +# print + for r in range(n_gen): for i in range(len(postorder)): - - print 'mcmc gen=', mcmc+1, - print - -# print 'current', - print 'current_node.edgelen=', nodes[i].number, nodes[i].edgelen -# print preedgelen = nodes[i].edgelen -# print 'nodes[i]=', nodes[i].edgelen currentlike = prepareTree(nodes, patterns, rates) # currentlike = 0.0 - currentprior = (-nodes[i].edgelen/b)-(log(b)) -# newf.write('%s\t'%(currentprior)) + currentprior = 0.0 + for nd in nodes: + currentprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) current_ln_posterior = currentlike + currentprior - u = random.random() m = exp(0.2*(u-0.5)) nodes[i].edgelen = preedgelen*m - print 'proposed_node.edgelen=',nodes[i].number, nodes[i].edgelen -# print 'new_edgelen=', nodes[i].edgelen - proposedprior = (-nodes[i].edgelen/b)-(log(b)) -# newf.write('%s\t'%(proposedprior)) - - proposedtlike = prepareTree(nodes, patterns, rates) -# proposedtlike = 0.0 - proposed_ln_posterior = proposedtlike + proposedprior - print - print 'current prior, currentlike=', currentprior, currentlike - print 'proposed prior, proposedlike=', proposedprior, proposedtlike - print + proposedprior = 0.0 + for nd in nodes: + proposedprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) + + proposedlike = prepareTree(nodes, patterns, rates) + proposed_ln_posterior = proposedlike + proposedprior hastings_ratio = log(m) - print 'hastings_ratio', hastings_ratio logR = proposed_ln_posterior - current_ln_posterior + hastings_ratio - print 'logR=', logR u2 = random.random() - print 'log-u2=', log(u2) if log(u2) < logR: nodes[i].edgelen = nodes[i].edgelen - print 'log(u2) < logR so new proposal accepted..' + log_prior = proposedprior + log_likelihood = proposedlike +# print 'log(u2) < logR so new proposal accepted..' else: nodes[i].edgelen = preedgelen - print 'failed accept new proposal..' + log_prior = currentprior + log_likelihood = currentlike +# print 'log(u2) > logR so failed to accept new proposal..' + + if (r+1) % save_every == 0: newf.write('%s\t'%(mcmc+1)) - print - print 'proposed=', + print 'mcmc gen=', mcmc+1 + print log_likelihood,log_prior, + newf.write( '%.6f\t%.6f\t'%(log_likelihood,log_prior)) for j,k in enumerate(nodes): newf.write( '%.6f\t'%(k.edgelen)) print k.edgelen, newf.write('\n') - mcmc+=1 - print - print '*********************************' - print '------------------------------------------------------------------------------' - - + print '**************************' + + newf.flush() + mcmc+=1 + def treenewick(): script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) @@ -467,7 +467,7 @@ def calcExpectedHeight(num_species, mu_over_s): if __name__ == '__main__': - random_seed = 346549 # 7632557, 12345 + random_seed = 348889 # 7632557, 12345 number_of_species = 5 mutation_speciation_rate_ratio = 0.689655172 # 0.689655172 # yields tree height 1 for 6 species random.seed(random_seed) @@ -484,9 +484,9 @@ def calcExpectedHeight(num_species, mu_over_s): # print ' expected height =',expected_height actual_height = calcActualHeight(species_tree_root) # print ' actual height =',actual_height - print ' newick: ',newick[0] + print 'true tree: ',newick[0] + print '**************************' - alpha = 0.5 ### gamma shape parameter rate categories # yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' rates_list = gammaRates(alpha) postorder = readnewick(treenewick())