brnlenMCMC.py

##########################################################################################
# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree
# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model,
# and performs MCMC on branch length parameter
##########################################################################################


import readSeq
import random
import re, os, itertools, sys, glob
from itertools import chain
from scipy.stats import  gamma
from math import exp, log


##########################################################################################
tree_file_name = 'tree.tre'
sequence_file = 'example3.nex'
alpha = 0.5 #gamma shape parameter for rate categories
n_gen = 50000
save_every = 50
mean_expo = 10. #mean_expo = mean of exponential distribution for branch length prior
##########################################################################################


class node(object):
    def __init__(self, ndnum):              # initialization function
        self.rsib = None                    # right sibling
        self.lchild = None                  # left child
        self.par = None                     # parent node
        self.number = ndnum                 # node number (internals negative, tips 0 or positive)
        self.edgelen = 0.0                  # branch length
        self.descendants = set([ndnum])     # set containing descendant leaf set
        self.partial = None                 # will have length 4*npatterns


    def __str__(self):
        # __str__ is a built-in function that is used by print to show an object
        descendants_as_string = ','.join(['%d' % d for d in self.descendants])

        lchildstr = 'None'
        if self.lchild is not None:
            lchildstr = '%d' % self.lchild.number

        rsibstr = 'None'
        if self.rsib is not None:
            rsibstr = '%d' % self.rsib.number

        parstr = 'None'
        if self.par is not None:
            parstr = '%d' % self.par.number

        return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string)


def allocatePartial(node, patterns, rates):
    if node.number > 0:
        npatterns = len(patterns)

        if node.partial is None:
            node.partial = [0.0]*(4*4*npatterns)
#             print len(node.partial)
        for i,pattern in enumerate(patterns.keys()):
            base = pattern[node.number-1]
            for l in range(4):
                if base == 'A':


                    node.partial[i*16+l*4 + 0] = 1.0
                elif base == 'C':
                    node.partial[i*16+l*4 + 1] = 1.0
                elif base == 'G':
                    node.partial[i*16+l*4 + 2] = 1.0
                elif base == 'T':
                    node.partial[i*16+l*4 + 3] = 1.0
                else:
                    assert(False), 'oops, something went horribly wrong!'

    else:

        npatterns = len(patterns)
        if node.partial is None:
            node.partial = [0.0]*(4*4*npatterns)

        like_list = []
        for i,pattern in enumerate(patterns.keys()):
            m_list = []
            num_pattern = patterns[pattern]

            for l,m in enumerate(rates):

                psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0))
                pdiff = (0.25-0.25*exp(-4.0*m*(node.lchild.edgelen)/3.0))

                psame2 = (0.25+0.75*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0))
                pdiff2 = (0.25-0.25*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0))

                num_pattern = patterns[pattern]
                pAA = psame*(node.lchild.partial[i*16+l*4 + 0])
                pAC = pdiff*(node.lchild.partial[i*16+l*4 + 1])
                pAG = pdiff*(node.lchild.partial[i*16+l*4 + 2])
                pAT = pdiff*(node.lchild.partial[i*16+l*4 + 3])

                pAA2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 0])
                pAC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1])
                pAG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2])
                pAT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3])

                pfromA_lchild = pAA+pAC+pAG+pAT
                pfromA_rchild = pAA2+pAC2+pAG2+pAT2
                node.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild


                ######################################################

                pCA = pdiff*(node.lchild.partial[i*16+l*4 + 0])
                pCC = psame*(node.lchild.partial[i*16+l*4 + 1])
                pCG = pdiff*(node.lchild.partial[i*16+l*4 + 2])
                pCT = pdiff*(node.lchild.partial[i*16+l*4 + 3])

                pCA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0])
                pCC2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 1])
                pCG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2])
                pCT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3])

                pfromC_lchild = pCA+pCC+pCG+pCT
                pfromC_rchild = pCA2+pCC2+pCG2+pCT2
                node.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild

                #######################################################
#
                pGA = pdiff*(node.lchild.partial[i*16+l*4 + 0])
                pGC = pdiff*(node.lchild.partial[i*16+l*4 + 1])
                pGG = psame*(node.lchild.partial[i*16+l*4 + 2])
                pGT = pdiff*(node.lchild.partial[i*16+l*4 + 3])

                pGA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0])
                pGC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1])
                pGG2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 2])
                pGT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3])

                pfromG_lchild = pGA+pGC+pGG+pGT
                pfromG_rchild = pGA2+pGC2+pGG2+pGT2
                node.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild

                #######################################################

                pTA = pdiff*(node.lchild.partial[i*16+l*4 + 0])
                pTC = pdiff*(node.lchild.partial[i*16+l*4 + 1])
                pTG = pdiff*(node.lchild.partial[i*16+l*4 + 2])
                pTT = psame*(node.lchild.partial[i*16+l*4 + 3])

                pTA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0])
                pTC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1])
                pTG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2])
                pTT2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 3])

                pfromT_lchild = pTA+pTC+pTG+pTT
                pfromT_rchild = pTA2+pTC2+pTG2+pTT2
                node.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild

            site_like = (sum(node.partial[i*16:i*16+16]))*0.25*0.25
            site_log_like = (log(site_like))*num_pattern
            like_list.append(site_log_like)
        log_like = sum(like_list)
        return log_like


def mcmcbrn(postorder, patterns, rates):
    nodes = readnewick(treenewick())
    mcmc = 0
    output = os.path.join('brnlenMCMC_results.txt')
    newf = open(output, 'w')
    newf.write('%s\t'%('n_gen'))
    newf.write( '%s\t%s\t'%('LnL','LnPr'))
    for nl in postorder:
        newf.write( 'node%s\t'%(nl.number))
    newf.write('\n')
    start_log_prior = 0.0
    for nd in nodes:
        start_log_prior += (-nd.edgelen/mean_expo)-(log(mean_expo))
    start_log_like = prepareTree(nodes, patterns, rates)


    newf.write('%s\t'%(mcmc))
    print 'mcmc gen=', mcmc
    print start_log_like, start_log_prior,

    newf.write( '%.6f\t%.6f\t'%(start_log_like,start_log_prior))
    for nl in postorder:
        newf.write( '%.6f\t'%(nl.edgelen))
        print nl.edgelen,
    print
    print '**************************'

    newf.write('\n')
#     print
    for r in range(n_gen):
        for i in range(len(postorder)):
            preedgelen = nodes[i].edgelen
            currentlike = prepareTree(nodes, patterns, rates)
#             currentlike = 0.0
            currentprior = 0.0
            for nd in nodes:
                currentprior += (-nd.edgelen/mean_expo)-(log(mean_expo))
            current_ln_posterior = currentlike + currentprior

            u = random.random()
            m = exp(0.2*(u-0.5))
            nodes[i].edgelen = preedgelen*m
            proposedprior = 0.0
            for nd in nodes:
                proposedprior += (-nd.edgelen/mean_expo)-(log(mean_expo))

            proposedlike = prepareTree(nodes, patterns, rates)
            proposed_ln_posterior = proposedlike + proposedprior
            hastings_ratio = log(m)
            logR = proposed_ln_posterior - current_ln_posterior + hastings_ratio
            u2 = random.random()
            if log(u2) < logR:
                nodes[i].edgelen = nodes[i].edgelen
                log_prior = proposedprior
                log_likelihood = proposedlike
#                 print 'log(u2) < logR so new proposal accepted..'
            else:
                nodes[i].edgelen = preedgelen
                log_prior = currentprior
                log_likelihood = currentlike

#                 print 'log(u2) > logR so failed to accept new proposal..'

        if (r+1) % save_every == 0:
            newf.write('%s\t'%(mcmc+1))
            print 'mcmc gen=', mcmc+1
            print log_likelihood,log_prior,
            newf.write( '%.6f\t%.6f\t'%(log_likelihood,log_prior))
            for j,k in enumerate(nodes):
                newf.write( '%.6f\t'%(k.edgelen))
                print k.edgelen,
            newf.write('\n')
            print
            print '**************************'

            newf.flush()

        mcmc+=1


def treenewick():
    script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
    path = os.path.join(script_dir, tree_file_name)
    with open(path, 'r') as content:
        newick = content.read()
    return newick
#

def gammaRates(alpha):
    bounds = [0.0, 0.25, 0.50, 0.75, 1.]
    rates = []
    for i in range(4):
#         print i
        lower = gamma.ppf(bounds[i], alpha, 0, 1./alpha)
        upper = gamma.ppf(bounds[i+1], alpha, 0, 1./alpha)
        mean_rate = ((gamma.cdf(upper, alpha+1., 0, 1./alpha) - gamma.cdf(lower, alpha+1., 0, 1./alpha)))*4.
        rates.append(mean_rate)
    return rates

def prepareTree(postorder, patterns, rates):
    likelihood_lists = []
    for nd in postorder:
        likelihood_lists.append(allocatePartial(nd, patterns, rates))
#     print 'log-likelihood of the topology =', likelihood_lists[-1]
    return likelihood_lists[-1]

def joinRandomPair(node_list, next_node_number, is_deep_coalescence):
    # pick first of two lineages to join and delete from node_list
    i = random.randint(1, len(node_list))
    ndi = node_list[i-1]
    del node_list[i-1]

    # pick second of two lineages to join and delete from node_list
    j = random.randint(1, len(node_list))
    ndj = node_list[j-1]
    del node_list[j-1]

    # join selected nodes and add ancestor to node_list
    ancnd = node(next_node_number)
    ancnd.deep = is_deep_coalescence
    ancnd.lchild = ndi
    ancnd.descendants = set()
    ancnd.descendants |= ndi.descendants
    ancnd.descendants |= ndj.descendants
    ndi.rsib = ndj
    ndi.par = ancnd
    ndj.par = ancnd
    node_list.append(ancnd)

    return node_list


def makeNewick(nd, brlen_scaler = 1.0, start = True): #
    global _newick
    global _TL

    if start:
        _newick = ''
        _TL = 0.0

    if nd.lchild:
        _newick += '('
        makeNewick(nd.lchild, brlen_scaler, False)

    else:
        blen = nd.edgelen*brlen_scaler
        _TL += blen
        _newick += '%d:%.5f' % (nd.number, blen)

    if nd.rsib:
        _newick += ','
        makeNewick(nd.rsib, brlen_scaler, False)
    elif nd.par is not None:
        blen = nd.par.edgelen*brlen_scaler
        _TL += blen
        _newick += '):%.3f' % blen

    return _newick, _TL

def calcActualHeight(root):
    h = 0.0
    nd = root
    while nd.lchild:
        nd = nd.lchild
        h += nd.edgelen
    return h


def readnewick(tree):
    total_length = len(tree)
    internal_node_number = -1

    root = node(internal_node_number)
    nd = root
    i = 0
    pre = [root]
    while i < total_length:
        m = tree[i]

        if m =='(':
            internal_node_number -= 1

            child = node(internal_node_number)
            pre.append(child)
            nd.lchild=child

            child.par=nd
            nd=child
        elif m == ',':
            internal_node_number -= 1
            rsib = node(internal_node_number)
            pre.append(rsib)
            nd.rsib = rsib
            rsib.par=nd.par
            nd = rsib
        elif m == ')':
            nd = nd.par

        elif m == ':':
            edge_len_str = ''
            i+=1
            m = tree[i]
            assert m in ['0','1','2','3','4','5','6','7','8', '9','.']
            while m in ['0','1','2','3','4','5','6','7','8', '9','.']:
                edge_len_str += m
                i+=1
                m = tree[i]
            i -=1
            nd.edgelen = float(edge_len_str)


        else:
            internal_node_number += 1

            if True:
                assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m
                mm = ''
                while m in ['0','1','2','3','4','5','6','7','8', '9' ]:

                    mm += m

                    i += 1
                    m = tree[i]
                nd.number = int(mm)
                i -= 1

        i += 1

    post = pre[:]
    post.reverse()
    return post

def Makenewick(pre):
    newickstring = ''
    for i,nd in enumerate(pre):
        if nd.lchild:
            newickstring += '('

        elif nd.rsib:
            newickstring += '%d' %(nd.number)
            newickstring += ':%.1f' % nd.edgelen
            newickstring += ','

        else:
            newickstring += '%d' %(nd.number)
            newickstring += ':%.1f' % nd.edgelen
            tmpnd = nd
            while (tmpnd.par is not None) and (tmpnd.rsib is None):
                newickstring += ')'
                newickstring += ':%.1f' % tmpnd.par.edgelen
                tmpnd = tmpnd.par

            if tmpnd.par is not None:
                newickstring += ','
    return newickstring

###################yule tree###################################################
# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree
# - num_species is the number of leaves (tips) in the tree
def calcPhi(num_species):
    phi = sum([1.0/(K+2.0) for K in range(num_species-1)])
    return phi

# yuleTree creates a species tree in which edge lengths are measured in
# expected number of substitutions.
# - num_species is the number of leaves
# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio
def yuleTree(num_species, mu_over_s):
    # create num_species nodes numbered 1, 2, ..., num_species
    nodes = [node(i+1) for i in range(num_species)]

    next_node_number = num_species + 1
    while len(nodes) > 1:
        # choose a speciation time in generations
        K = float(len(nodes))
        mean_epoch_length = mu_over_s/K
        t = random.gammavariate(1.0, mean_epoch_length)

        # update each node's edgelen
        for n in nodes:
            n.edgelen += t      # same as: n.edgelen = n.edgelen + t

        nodes = joinRandomPair(nodes, next_node_number, False)
        next_node_number += 1

    return nodes[0]

# calcExpectedHeight returns the expected height of the species tree in terms of
# expected number of substitutions from the root to one tip.
# - num_species is the number of leaves
# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio
def calcExpectedHeight(num_species, mu_over_s):
    return mu_over_s*calcPhi(num_species)


if __name__ == '__main__':
    random_seed = 348889 # 7632557, 12345
    number_of_species = 5
    mutation_speciation_rate_ratio = 0.689655172 # 0.689655172  # yields tree height 1 for 6 species
    random.seed(random_seed)
    species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio)
#     print '#########'
#     print species_tree_root
    newick = makeNewick(species_tree_root)
#     print 'Random number seed: %d' % random_seed
#     print 'Simulating one tree:'
#     print '  number of species              = %d' % number_of_species
#     print '  mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio
#     print '  actual tree length             =',newick[1]
    expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio)
#     print '  expected height                =',expected_height
    actual_height = calcActualHeight(species_tree_root)
#     print '  actual height                  =',actual_height
    print 'true tree: ',newick[0]
    print '**************************'

#     yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)'
    rates_list = gammaRates(alpha)
    postorder = readnewick(treenewick())
    result =  prepareTree(postorder, readSeq.patterns(sequence_file), rates_list)
#     try1 = readSeq.patterns()
    result2 = mcmcbrn(postorder, readSeq.patterns(sequence_file), rates_list)
	##########################################################################################
	# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree
	# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model,
	# and performs MCMC on branch length parameter
	##########################################################################################


	import readSeq
	import random
	import re, os, itertools, sys, glob
	from itertools import chain
	from scipy.stats import gamma
	from math import exp, log


	##########################################################################################
	tree_file_name = 'tree.tre'
	sequence_file = 'example3.nex'
	alpha = 0.5 #gamma shape parameter for rate categories
	n_gen = 50000
	save_every = 50
	mean_expo = 10. #mean_expo = mean of exponential distribution for branch length prior
	##########################################################################################




	class node(object):
	def __init__(self, ndnum): # initialization function
	self.rsib = None # right sibling
	self.lchild = None # left child
	self.par = None # parent node
	self.number = ndnum # node number (internals negative, tips 0 or positive)
	self.edgelen = 0.0 # branch length
	self.descendants = set([ndnum]) # set containing descendant leaf set
	self.partial = None # will have length 4*npatterns


	def __str__(self):
	# __str__ is a built-in function that is used by print to show an object
	descendants_as_string = ','.join(['%d' % d for d in self.descendants])

	lchildstr = 'None'
	if self.lchild is not None:
	lchildstr = '%d' % self.lchild.number

	rsibstr = 'None'
	if self.rsib is not None:
	rsibstr = '%d' % self.rsib.number

	parstr = 'None'
	if self.par is not None:
	parstr = '%d' % self.par.number

	return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string)



	def allocatePartial(node, patterns, rates):
	if node.number > 0:
	npatterns = len(patterns)

	if node.partial is None:
	node.partial = [0.0](44*npatterns)
	# print len(node.partial)
	for i,pattern in enumerate(patterns.keys()):
	base = pattern[node.number-1]
	for l in range(4):
	if base == 'A':


	node.partial[i16+l4 + 0] = 1.0
	elif base == 'C':
	node.partial[i16+l4 + 1] = 1.0
	elif base == 'G':
	node.partial[i16+l4 + 2] = 1.0
	elif base == 'T':
	node.partial[i16+l4 + 3] = 1.0
	else:
	assert(False), 'oops, something went horribly wrong!'

	else:

	npatterns = len(patterns)
	if node.partial is None:
	node.partial = [0.0](44*npatterns)

	like_list = []
	for i,pattern in enumerate(patterns.keys()):
	m_list = []
	num_pattern = patterns[pattern]

	for l,m in enumerate(rates):

	psame = (0.25+0.75exp(-4.0m*(node.lchild.edgelen)/3.0))
	pdiff = (0.25-0.25exp(-4.0m*(node.lchild.edgelen)/3.0))

	psame2 = (0.25+0.75exp(-4.0m*(node.lchild.rsib.edgelen)/3.0))
	pdiff2 = (0.25-0.25exp(-4.0m*(node.lchild.rsib.edgelen)/3.0))

	num_pattern = patterns[pattern]
	pAA = psame(node.lchild.partial[i16+l*4 + 0])
	pAC = pdiff(node.lchild.partial[i16+l*4 + 1])
	pAG = pdiff(node.lchild.partial[i16+l*4 + 2])
	pAT = pdiff(node.lchild.partial[i16+l*4 + 3])

	pAA2 = psame2(node.lchild.rsib.partial[i16+l*4 + 0])
	pAC2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 1])
	pAG2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 2])
	pAT2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 3])

	pfromA_lchild = pAA+pAC+pAG+pAT
	pfromA_rchild = pAA2+pAC2+pAG2+pAT2
	node.partial[i16+l4 + 0] = pfromA_lchild*pfromA_rchild


	######################################################

	pCA = pdiff(node.lchild.partial[i16+l*4 + 0])
	pCC = psame(node.lchild.partial[i16+l*4 + 1])
	pCG = pdiff(node.lchild.partial[i16+l*4 + 2])
	pCT = pdiff(node.lchild.partial[i16+l*4 + 3])

	pCA2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 0])
	pCC2 = psame2(node.lchild.rsib.partial[i16+l*4 + 1])
	pCG2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 2])
	pCT2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 3])

	pfromC_lchild = pCA+pCC+pCG+pCT
	pfromC_rchild = pCA2+pCC2+pCG2+pCT2
	node.partial[i16+l4 + 1] = pfromC_lchild*pfromC_rchild

	#######################################################
	#
	pGA = pdiff(node.lchild.partial[i16+l*4 + 0])
	pGC = pdiff(node.lchild.partial[i16+l*4 + 1])
	pGG = psame(node.lchild.partial[i16+l*4 + 2])
	pGT = pdiff(node.lchild.partial[i16+l*4 + 3])

	pGA2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 0])
	pGC2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 1])
	pGG2 = psame2(node.lchild.rsib.partial[i16+l*4 + 2])
	pGT2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 3])

	pfromG_lchild = pGA+pGC+pGG+pGT
	pfromG_rchild = pGA2+pGC2+pGG2+pGT2
	node.partial[i16+l4 + 2] = pfromG_lchild*pfromG_rchild

	#######################################################

	pTA = pdiff(node.lchild.partial[i16+l*4 + 0])
	pTC = pdiff(node.lchild.partial[i16+l*4 + 1])
	pTG = pdiff(node.lchild.partial[i16+l*4 + 2])
	pTT = psame(node.lchild.partial[i16+l*4 + 3])

	pTA2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 0])
	pTC2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 1])
	pTG2 = pdiff2(node.lchild.rsib.partial[i16+l*4 + 2])
	pTT2 = psame2(node.lchild.rsib.partial[i16+l*4 + 3])

	pfromT_lchild = pTA+pTC+pTG+pTT
	pfromT_rchild = pTA2+pTC2+pTG2+pTT2
	node.partial[i16+l4 + 3] = pfromT_lchild*pfromT_rchild

	site_like = (sum(node.partial[i16:i16+16]))0.250.25
	site_log_like = (log(site_like))*num_pattern
	like_list.append(site_log_like)
	log_like = sum(like_list)
	return log_like



	def mcmcbrn(postorder, patterns, rates):
	nodes = readnewick(treenewick())
	mcmc = 0
	output = os.path.join('brnlenMCMC_results.txt')
	newf = open(output, 'w')
	newf.write('%s\t'%('n_gen'))
	newf.write( '%s\t%s\t'%('LnL','LnPr'))
	for nl in postorder:
	newf.write( 'node%s\t'%(nl.number))
	newf.write('\n')
	start_log_prior = 0.0
	for nd in nodes:
	start_log_prior += (-nd.edgelen/mean_expo)-(log(mean_expo))
	start_log_like = prepareTree(nodes, patterns, rates)


	newf.write('%s\t'%(mcmc))
	print 'mcmc gen=', mcmc
	print start_log_like, start_log_prior,

	newf.write( '%.6f\t%.6f\t'%(start_log_like,start_log_prior))
	for nl in postorder:
	newf.write( '%.6f\t'%(nl.edgelen))
	print nl.edgelen,
	print
	print '**************************'

	newf.write('\n')
	# print
	for r in range(n_gen):
	for i in range(len(postorder)):
	preedgelen = nodes[i].edgelen
	currentlike = prepareTree(nodes, patterns, rates)
	# currentlike = 0.0
	currentprior = 0.0
	for nd in nodes:
	currentprior += (-nd.edgelen/mean_expo)-(log(mean_expo))
	current_ln_posterior = currentlike + currentprior

	u = random.random()
	m = exp(0.2*(u-0.5))
	nodes[i].edgelen = preedgelen*m
	proposedprior = 0.0
	for nd in nodes:
	proposedprior += (-nd.edgelen/mean_expo)-(log(mean_expo))

	proposedlike = prepareTree(nodes, patterns, rates)
	proposed_ln_posterior = proposedlike + proposedprior
	hastings_ratio = log(m)
	logR = proposed_ln_posterior - current_ln_posterior + hastings_ratio
	u2 = random.random()
	if log(u2) < logR:
	nodes[i].edgelen = nodes[i].edgelen
	log_prior = proposedprior
	log_likelihood = proposedlike
	# print 'log(u2) < logR so new proposal accepted..'
	else:
	nodes[i].edgelen = preedgelen
	log_prior = currentprior
	log_likelihood = currentlike

	# print 'log(u2) > logR so failed to accept new proposal..'

	if (r+1) % save_every == 0:
	newf.write('%s\t'%(mcmc+1))
	print 'mcmc gen=', mcmc+1
	print log_likelihood,log_prior,
	newf.write( '%.6f\t%.6f\t'%(log_likelihood,log_prior))
	for j,k in enumerate(nodes):
	newf.write( '%.6f\t'%(k.edgelen))
	print k.edgelen,
	newf.write('\n')
	print
	print '**************************'

	newf.flush()

	mcmc+=1


	def treenewick():
	script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
	path = os.path.join(script_dir, tree_file_name)
	with open(path, 'r') as content:
	newick = content.read()
	return newick
	#

	def gammaRates(alpha):
	bounds = [0.0, 0.25, 0.50, 0.75, 1.]
	rates = []
	for i in range(4):
	# print i
	lower = gamma.ppf(bounds[i], alpha, 0, 1./alpha)
	upper = gamma.ppf(bounds[i+1], alpha, 0, 1./alpha)
	mean_rate = ((gamma.cdf(upper, alpha+1., 0, 1./alpha) - gamma.cdf(lower, alpha+1., 0, 1./alpha)))*4.
	rates.append(mean_rate)
	return rates

	def prepareTree(postorder, patterns, rates):
	likelihood_lists = []
	for nd in postorder:
	likelihood_lists.append(allocatePartial(nd, patterns, rates))
	# print 'log-likelihood of the topology =', likelihood_lists[-1]
	return likelihood_lists[-1]

	def joinRandomPair(node_list, next_node_number, is_deep_coalescence):
	# pick first of two lineages to join and delete from node_list
	i = random.randint(1, len(node_list))
	ndi = node_list[i-1]
	del node_list[i-1]

	# pick second of two lineages to join and delete from node_list
	j = random.randint(1, len(node_list))
	ndj = node_list[j-1]
	del node_list[j-1]

	# join selected nodes and add ancestor to node_list
	ancnd = node(next_node_number)
	ancnd.deep = is_deep_coalescence
	ancnd.lchild = ndi
	ancnd.descendants = set()
	ancnd.descendants \|= ndi.descendants
	ancnd.descendants \|= ndj.descendants
	ndi.rsib = ndj
	ndi.par = ancnd
	ndj.par = ancnd
	node_list.append(ancnd)

	return node_list


	def makeNewick(nd, brlen_scaler = 1.0, start = True): #
	global _newick
	global _TL

	if start:
	_newick = ''
	_TL = 0.0

	if nd.lchild:
	_newick += '('
	makeNewick(nd.lchild, brlen_scaler, False)

	else:
	blen = nd.edgelen*brlen_scaler
	_TL += blen
	_newick += '%d:%.5f' % (nd.number, blen)

	if nd.rsib:
	_newick += ','
	makeNewick(nd.rsib, brlen_scaler, False)
	elif nd.par is not None:
	blen = nd.par.edgelen*brlen_scaler
	_TL += blen
	_newick += '):%.3f' % blen

	return _newick, _TL

	def calcActualHeight(root):
	h = 0.0
	nd = root
	while nd.lchild:
	nd = nd.lchild
	h += nd.edgelen
	return h


	def readnewick(tree):
	total_length = len(tree)
	internal_node_number = -1

	root = node(internal_node_number)
	nd = root
	i = 0
	pre = [root]
	while i < total_length:
	m = tree[i]

	if m =='(':
	internal_node_number -= 1

	child = node(internal_node_number)
	pre.append(child)
	nd.lchild=child

	child.par=nd
	nd=child
	elif m == ',':
	internal_node_number -= 1
	rsib = node(internal_node_number)
	pre.append(rsib)
	nd.rsib = rsib
	rsib.par=nd.par
	nd = rsib
	elif m == ')':
	nd = nd.par

	elif m == ':':
	edge_len_str = ''
	i+=1
	m = tree[i]
	assert m in ['0','1','2','3','4','5','6','7','8', '9','.']
	while m in ['0','1','2','3','4','5','6','7','8', '9','.']:
	edge_len_str += m
	i+=1
	m = tree[i]
	i -=1
	nd.edgelen = float(edge_len_str)


	else:
	internal_node_number += 1

	if True:
	assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m
	mm = ''
	while m in ['0','1','2','3','4','5','6','7','8', '9' ]:

	mm += m

	i += 1
	m = tree[i]
	nd.number = int(mm)
	i -= 1

	i += 1

	post = pre[:]
	post.reverse()
	return post

	def Makenewick(pre):
	newickstring = ''
	for i,nd in enumerate(pre):
	if nd.lchild:
	newickstring += '('

	elif nd.rsib:
	newickstring += '%d' %(nd.number)
	newickstring += ':%.1f' % nd.edgelen
	newickstring += ','

	else:
	newickstring += '%d' %(nd.number)
	newickstring += ':%.1f' % nd.edgelen
	tmpnd = nd
	while (tmpnd.par is not None) and (tmpnd.rsib is None):
	newickstring += ')'
	newickstring += ':%.1f' % tmpnd.par.edgelen
	tmpnd = tmpnd.par

	if tmpnd.par is not None:
	newickstring += ','
	return newickstring

	###################yule tree###################################################
	# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree
	# - num_species is the number of leaves (tips) in the tree
	def calcPhi(num_species):
	phi = sum([1.0/(K+2.0) for K in range(num_species-1)])
	return phi

	# yuleTree creates a species tree in which edge lengths are measured in
	# expected number of substitutions.
	# - num_species is the number of leaves
	# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio
	def yuleTree(num_species, mu_over_s):
	# create num_species nodes numbered 1, 2, ..., num_species
	nodes = [node(i+1) for i in range(num_species)]

	next_node_number = num_species + 1
	while len(nodes) > 1:
	# choose a speciation time in generations
	K = float(len(nodes))
	mean_epoch_length = mu_over_s/K
	t = random.gammavariate(1.0, mean_epoch_length)

	# update each node's edgelen
	for n in nodes:
	n.edgelen += t # same as: n.edgelen = n.edgelen + t

	nodes = joinRandomPair(nodes, next_node_number, False)
	next_node_number += 1

	return nodes[0]

	# calcExpectedHeight returns the expected height of the species tree in terms of
	# expected number of substitutions from the root to one tip.
	# - num_species is the number of leaves
	# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio
	def calcExpectedHeight(num_species, mu_over_s):
	return mu_over_s*calcPhi(num_species)


	if __name__ == '__main__':
	random_seed = 348889 # 7632557, 12345
	number_of_species = 5
	mutation_speciation_rate_ratio = 0.689655172 # 0.689655172 # yields tree height 1 for 6 species
	random.seed(random_seed)
	species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio)
	# print '#########'
	# print species_tree_root
	newick = makeNewick(species_tree_root)
	# print 'Random number seed: %d' % random_seed
	# print 'Simulating one tree:'
	# print ' number of species = %d' % number_of_species
	# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio
	# print ' actual tree length =',newick[1]
	expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio)
	# print ' expected height =',expected_height
	actual_height = calcActualHeight(species_tree_root)
	# print ' actual height =',actual_height
	print 'true tree: ',newick[0]
	print '**************************'

	# yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)'
	rates_list = gammaRates(alpha)
	postorder = readnewick(treenewick())
	result = prepareTree(postorder, readSeq.patterns(sequence_file), rates_list)
	# try1 = readSeq.patterns()
	result2 = mcmcbrn(postorder, readSeq.patterns(sequence_file), rates_list)