From a6626b4dc62de059a09c183d4273ec8d09a1ff8b Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Sat, 22 Apr 2017 19:14:46 -0400 Subject: [PATCH] final script to compute log-likilhood of a toplogy under Jukes Cantor model --- tree_like.py | 377 +++++++++++++++++++++++++++++++++++++++++++++++++++ tree_test.py | 144 +++++++++----------- 2 files changed, 443 insertions(+), 78 deletions(-) create mode 100644 tree_like.py diff --git a/tree_like.py b/tree_like.py new file mode 100644 index 0000000..293f86b --- /dev/null +++ b/tree_like.py @@ -0,0 +1,377 @@ +################################## +# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree +# topology, and computes log-likelihood of the topology under Jukes Cantor model +################################### + +import readseq +import random +import re, os, itertools, sys, glob +from itertools import chain +from math import exp, log +class node(object): + def __init__(self, ndnum): # initialization function + self.rsib = None # right sibling + self.lchild = None # left child + self.par = None # parent node + self.number = ndnum # node number (internals negative, tips 0 or positive) + self.edgelen = 0.0 # branch length + self.descendants = set([ndnum]) # set containing descendant leaf set + self.partial = None # will have length 4*npatterns + + def allocatePartial(self, patterns): + if self.number > 0: + npatterns = len(patterns) + self.partial = [0.0]*(4*npatterns) + for i,pattern in enumerate(patterns.keys()): + base = pattern[self.number-1] + if base == 'A': + self.partial[i*4 + 0] = 1.0 + elif base == 'C': + self.partial[i*4 + 1] = 1.0 + elif base == 'G': + self.partial[i*4 + 2] = 1.0 + elif base == 'T': + self.partial[i*4 + 3] = 1.0 + else: + assert(False), 'oops, something went horribly wrong!' + + else: + npatterns = len(patterns) + self.partial = [0.0]*(4*npatterns) + like_list = [] + for i,pattern in enumerate(patterns.keys()): + psame = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0)) + pdiff = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0)) + + psame2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0)) + pdiff2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0)) + + num_pattern = patterns[pattern] + + pAA = psame*(self.lchild.partial[i*4 + 0]) + pAC = pdiff*(self.lchild.partial[i*4 + 1]) + pAG = pdiff*(self.lchild.partial[i*4 + 2]) + pAT = pdiff*(self.lchild.partial[i*4 + 3]) + + pAA2 = psame2*(self.lchild.rsib.partial[i*4 + 0]) + pAC2 = pdiff2*(self.lchild.rsib.partial[i*4 + 1]) + pAG2 = pdiff2*(self.lchild.rsib.partial[i*4 + 2]) + pAT2 = pdiff2*(self.lchild.rsib.partial[i*4 + 3]) + + pfromA_lchild = pAA+pAC+pAG+pAT + pfromA_rchild = pAA2+pAC2+pAG2+pAT2 + self.partial[i*4 + 0] = pfromA_lchild*pfromA_rchild + + ###################################################### + + pCA = pdiff*(self.lchild.partial[i*4 + 0]) + pCC = psame*(self.lchild.partial[i*4 + 1]) + pCG = pdiff*(self.lchild.partial[i*4 + 2]) + pCT = pdiff*(self.lchild.partial[i*4 + 3]) + + pCA2 = pdiff2*(self.lchild.rsib.partial[i*4 + 0]) + pCC2 = psame2*(self.lchild.rsib.partial[i*4 + 1]) + pCG2 = pdiff2*(self.lchild.rsib.partial[i*4 + 2]) + pCT2 = pdiff2*(self.lchild.rsib.partial[i*4 + 3]) + + pfromC_lchild = pCA+pCC+pCG+pCT + pfromC_rchild = pCA2+pCC2+pCG2+pCT2 + self.partial[i*4 + 1] = pfromC_lchild*pfromC_rchild + + ####################################################### +# + pGA = pdiff*(self.lchild.partial[i*4 + 0]) + pGC = pdiff*(self.lchild.partial[i*4 + 1]) + pGG = psame*(self.lchild.partial[i*4 + 2]) + pGT = pdiff*(self.lchild.partial[i*4 + 3]) +# + pGA2 = pdiff2*(self.lchild.rsib.partial[i*4 + 0]) + pGC2 = pdiff2*(self.lchild.rsib.partial[i*4 + 1]) + pGG2 = psame2*(self.lchild.rsib.partial[i*4 + 2]) + pGT2 = pdiff2*(self.lchild.rsib.partial[i*4 + 3]) +# + pfromG_lchild = pGA+pGC+pGG+pGT + pfromG_rchild = pGA2+pGC2+pGG2+pGT2 + self.partial[i*4 + 2] = pfromG_lchild*pfromG_rchild + + ####################################################### + + pTA = pdiff*(self.lchild.partial[i*4 + 0]) + pTC = pdiff*(self.lchild.partial[i*4 + 1]) + pTG = pdiff*(self.lchild.partial[i*4 + 2]) + pTT = psame*(self.lchild.partial[i*4 + 3]) +# + pTA2 = pdiff2*(self.lchild.rsib.partial[i*4 + 0]) + pTC2 = pdiff2*(self.lchild.rsib.partial[i*4 + 1]) + pTG2 = pdiff2*(self.lchild.rsib.partial[i*4 + 2]) + pTT2 = psame2*(self.lchild.rsib.partial[i*4 + 3]) +# + pfromT_lchild = pTA+pTC+pTG+pTT + pfromT_rchild = pTA2+pTC2+pTG2+pTT2 + self.partial[i*4 + 3] = pfromT_lchild*pfromT_rchild + + ######################################################### + + site_log_like = (log((sum(self.partial[i*4:i*4+4]))*0.25))*num_pattern + like_list.append(site_log_like) + + log_Like = sum(like_list) + return log_Like +# print 'log-like=', log_Like + + def __str__(self): + # __str__ is a built-in function that is used by print to show an object + descendants_as_string = ','.join(['%d' % d for d in self.descendants]) + + lchildstr = 'None' + if self.lchild is not None: + lchildstr = '%d' % self.lchild.number + + rsibstr = 'None' + if self.rsib is not None: + rsibstr = '%d' % self.rsib.number + + parstr = 'None' + if self.par is not None: + parstr = '%d' % self.par.number + + return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) + + +def prepareTree(postorder, patterns): + likelihood_lists = [] + for nd in postorder: + likelihood_lists.append(nd.allocatePartial(patterns)) + print 'log-likelihood of the topology =', likelihood_lists[-1] + +def joinRandomPair(node_list, next_node_number, is_deep_coalescence): + # pick first of two lineages to join and delete from node_list + i = random.randint(1, len(node_list)) + ndi = node_list[i-1] + del node_list[i-1] + + # pick second of two lineages to join and delete from node_list + j = random.randint(1, len(node_list)) + ndj = node_list[j-1] + del node_list[j-1] + + # join selected nodes and add ancestor to node_list + ancnd = node(next_node_number) + ancnd.deep = is_deep_coalescence + ancnd.lchild = ndi + ancnd.descendants = set() + ancnd.descendants |= ndi.descendants + ancnd.descendants |= ndj.descendants + ndi.rsib = ndj + ndi.par = ancnd + ndj.par = ancnd + node_list.append(ancnd) + + return node_list + + +def makeNewick(nd, brlen_scaler = 1.0, start = True): # + global _newick + global _TL + + if start: + _newick = '' + _TL = 0.0 + + if nd.lchild: + _newick += '(' + makeNewick(nd.lchild, brlen_scaler, False) + + else: + blen = nd.edgelen*brlen_scaler + _TL += blen + _newick += '%d:%.5f' % (nd.number, blen) + + if nd.rsib: + _newick += ',' + makeNewick(nd.rsib, brlen_scaler, False) + elif nd.par is not None: + blen = nd.par.edgelen*brlen_scaler + _TL += blen + _newick += '):%.3f' % blen + + return _newick, _TL + +def calcActualHeight(root): + h = 0.0 + nd = root + while nd.lchild: + nd = nd.lchild + h += nd.edgelen + return h + +def getPostorder(nd, start = True): # how to travel across tree + global _postorder + if start: + _postorder = [] # start with an empty list + + + if nd.lchild: + getPostorder(nd.lchild, False) # recursive function to + _postorder.append(nd) + + if nd.rsib: + getPostorder(nd.rsib, False) + + + + return _postorder + + +def readnewick(tree): + total_length = len(tree) + internal_node_number = -1 + + root = node(internal_node_number) + nd = root + i = 0 + pre = [root] + while i < total_length: + m = tree[i] + + if m =='(': + internal_node_number -= 1 + + child = node(internal_node_number) + pre.append(child) + nd.lchild=child + + child.par=nd + nd=child + elif m == ',': + internal_node_number -= 1 + rsib = node(internal_node_number) + pre.append(rsib) + nd.rsib = rsib + rsib.par=nd.par + nd = rsib + elif m == ')': + nd = nd.par + + elif m == ':': + edge_len_str = '' + i+=1 + m = tree[i] + assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] + while m in ['0','1','2','3','4','5','6','7','8', '9','.']: + edge_len_str += m + i+=1 + m = tree[i] + i -=1 + nd.edgelen = float(edge_len_str) + + + else: + internal_node_number += 1 + + if True: + assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m + mm = '' + while m in ['0','1','2','3','4','5','6','7','8', '9' ]: + + mm += m + + i += 1 + m = tree[i] + nd.number = int(mm) + i -= 1 + + i += 1 + + post = pre[:] + post.reverse() + return post + +def Makenewick(pre): + newickstring = '' + for i,nd in enumerate(pre): + if nd.lchild: + newickstring += '(' + + elif nd.rsib: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + newickstring += ',' + + else: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + tmpnd = nd + while (tmpnd.par is not None) and (tmpnd.rsib is None): + newickstring += ')' + newickstring += ':%.1f' % tmpnd.par.edgelen + tmpnd = tmpnd.par + + if tmpnd.par is not None: + newickstring += ',' + return newickstring + +###################yule tree################################################### +# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree +# - num_species is the number of leaves (tips) in the tree +def calcPhi(num_species): + phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) + return phi + +# yuleTree creates a species tree in which edge lengths are measured in +# expected number of substitutions. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def yuleTree(num_species, mu_over_s): + # create num_species nodes numbered 1, 2, ..., num_species + nodes = [node(i+1) for i in range(num_species)] + + next_node_number = num_species + 1 + while len(nodes) > 1: + # choose a speciation time in generations + K = float(len(nodes)) + mean_epoch_length = mu_over_s/K + t = random.gammavariate(1.0, mean_epoch_length) + + # update each node's edgelen + for n in nodes: + n.edgelen += t # same as: n.edgelen = n.edgelen + t + + nodes = joinRandomPair(nodes, next_node_number, False) + next_node_number += 1 + + return nodes[0] + +# calcExpectedHeight returns the expected height of the species tree in terms of +# expected number of substitutions from the root to one tip. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def calcExpectedHeight(num_species, mu_over_s): + return mu_over_s*calcPhi(num_species) + + +if __name__ == '__main__': + random_seed = 24553 # 7632557, 12345 + number_of_species = 5 + mutation_speciation_rate_ratio = 0.4 # 0.689655172 # yields tree height 1 for 6 species + random.seed(random_seed) + species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) +# print '#########' +# print species_tree_root + newick = makeNewick(species_tree_root) +# print 'Random number seed: %d' % random_seed +# print 'Simulating one tree:' +# print ' number of species = %d' % number_of_species +# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio +# print ' actual tree length =',newick[1] + expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) +# print ' expected height =',expected_height + actual_height = calcActualHeight(species_tree_root) +# print ' actual height =',actual_height +# print ' newick: ',newick[0] + + + yuletree = '(((1:0.03915,5:0.03915):0.387,(4:0.42253,2:0.42253):0.004):0.118,3:0.54433)' + + postorder = readnewick(yuletree) + result = prepareTree(postorder, readseq.patterns()) diff --git a/tree_test.py b/tree_test.py index ea4d760..293f86b 100644 --- a/tree_test.py +++ b/tree_test.py @@ -1,11 +1,8 @@ +################################## +# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree +# topology, and computes log-likelihood of the topology under Jukes Cantor model +################################### -# git clone https://github.uconn.edu/sun13005/sntree.git . -# git status -# git add -# git commit -am "some message" -# git push -# git log -# git show fe580563:tree.py > tmp.py --> to extract older version import readseq import random import re, os, itertools, sys, glob @@ -20,10 +17,8 @@ def __init__(self, ndnum): # initialization function self.edgelen = 0.0 # branch length self.descendants = set([ndnum]) # set containing descendant leaf set self.partial = None # will have length 4*npatterns - self.state = None def allocatePartial(self, patterns): - if self.number > 0: npatterns = len(patterns) self.partial = [0.0]*(4*npatterns) @@ -39,84 +34,94 @@ def allocatePartial(self, patterns): self.partial[i*4 + 3] = 1.0 else: assert(False), 'oops, something went horribly wrong!' - + else: npatterns = len(patterns) self.partial = [0.0]*(4*npatterns) like_list = [] for i,pattern in enumerate(patterns.keys()): + psame = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0)) + pdiff = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0)) + + psame2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0)) + pdiff2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0)) + num_pattern = patterns[pattern] - pAA = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 0]) - pAC = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 1]) - pAG = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 2]) - pAT = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 3]) + + pAA = psame*(self.lchild.partial[i*4 + 0]) + pAC = pdiff*(self.lchild.partial[i*4 + 1]) + pAG = pdiff*(self.lchild.partial[i*4 + 2]) + pAT = pdiff*(self.lchild.partial[i*4 + 3]) - pAA2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 0]) - pAC2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 1]) - pAG2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 2]) - pAT2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 3]) + pAA2 = psame2*(self.lchild.rsib.partial[i*4 + 0]) + pAC2 = pdiff2*(self.lchild.rsib.partial[i*4 + 1]) + pAG2 = pdiff2*(self.lchild.rsib.partial[i*4 + 2]) + pAT2 = pdiff2*(self.lchild.rsib.partial[i*4 + 3]) pfromA_lchild = pAA+pAC+pAG+pAT pfromA_rchild = pAA2+pAC2+pAG2+pAT2 self.partial[i*4 + 0] = pfromA_lchild*pfromA_rchild -############################################################################################################################## + ###################################################### - pCA = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 0]) - pCC = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 1]) - pCG = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 2]) - pCT = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 3]) + pCA = pdiff*(self.lchild.partial[i*4 + 0]) + pCC = psame*(self.lchild.partial[i*4 + 1]) + pCG = pdiff*(self.lchild.partial[i*4 + 2]) + pCT = pdiff*(self.lchild.partial[i*4 + 3]) - pCA2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 0]) - pCC2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 1]) - pCG2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 2]) - pCT2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 3]) + pCA2 = pdiff2*(self.lchild.rsib.partial[i*4 + 0]) + pCC2 = psame2*(self.lchild.rsib.partial[i*4 + 1]) + pCG2 = pdiff2*(self.lchild.rsib.partial[i*4 + 2]) + pCT2 = pdiff2*(self.lchild.rsib.partial[i*4 + 3]) pfromC_lchild = pCA+pCC+pCG+pCT pfromC_rchild = pCA2+pCC2+pCG2+pCT2 self.partial[i*4 + 1] = pfromC_lchild*pfromC_rchild -############################################################################################################################## - - pGA = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 0]) - pGC = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 1]) - pGG = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 2]) - pGT = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 3]) - - pGA2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 0]) - pGC2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 1]) - pGG2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 2]) - pGT2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 3]) + ####################################################### +# + pGA = pdiff*(self.lchild.partial[i*4 + 0]) + pGC = pdiff*(self.lchild.partial[i*4 + 1]) + pGG = psame*(self.lchild.partial[i*4 + 2]) + pGT = pdiff*(self.lchild.partial[i*4 + 3]) +# + pGA2 = pdiff2*(self.lchild.rsib.partial[i*4 + 0]) + pGC2 = pdiff2*(self.lchild.rsib.partial[i*4 + 1]) + pGG2 = psame2*(self.lchild.rsib.partial[i*4 + 2]) + pGT2 = pdiff2*(self.lchild.rsib.partial[i*4 + 3]) +# pfromG_lchild = pGA+pGC+pGG+pGT pfromG_rchild = pGA2+pGC2+pGG2+pGT2 self.partial[i*4 + 2] = pfromG_lchild*pfromG_rchild -############################################################################################################################## - pTA = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 0]) - pTC = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 1]) - pTG = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 2]) - pTT = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 3]) - - pTA2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 0]) - pTC2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 1]) - pTG2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 2]) - pTT2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 3]) - + + ####################################################### + + pTA = pdiff*(self.lchild.partial[i*4 + 0]) + pTC = pdiff*(self.lchild.partial[i*4 + 1]) + pTG = pdiff*(self.lchild.partial[i*4 + 2]) + pTT = psame*(self.lchild.partial[i*4 + 3]) +# + pTA2 = pdiff2*(self.lchild.rsib.partial[i*4 + 0]) + pTC2 = pdiff2*(self.lchild.rsib.partial[i*4 + 1]) + pTG2 = pdiff2*(self.lchild.rsib.partial[i*4 + 2]) + pTT2 = psame2*(self.lchild.rsib.partial[i*4 + 3]) +# pfromT_lchild = pTA+pTC+pTG+pTT pfromT_rchild = pTA2+pTC2+pTG2+pTT2 self.partial[i*4 + 3] = pfromT_lchild*pfromT_rchild -############################################################################################################################## + + ######################################################### - site_like = (log((sum(self.partial[i*4:i*4+4]))*0.25))*num_pattern -# print 'site_like=', site_like - like_list.append(site_like) + site_log_like = (log((sum(self.partial[i*4:i*4+4]))*0.25))*num_pattern + like_list.append(site_log_like) - Like = sum(like_list) - print 'Like=', Like - + log_Like = sum(like_list) + return log_Like +# print 'log-like=', log_Like + def __str__(self): # __str__ is a built-in function that is used by print to show an object descendants_as_string = ','.join(['%d' % d for d in self.descendants]) -# print descendants_as_string lchildstr = 'None' if self.lchild is not None: @@ -133,11 +138,11 @@ def __str__(self): return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - def prepareTree(postorder, patterns): + likelihood_lists = [] for nd in postorder: - nd.allocatePartial(patterns) - + likelihood_lists.append(nd.allocatePartial(patterns)) + print 'log-likelihood of the topology =', likelihood_lists[-1] def joinRandomPair(node_list, next_node_number, is_deep_coalescence): # pick first of two lineages to join and delete from node_list @@ -219,7 +224,6 @@ def getPostorder(nd, start = True): # how to travel across tree def readnewick(tree): -# global pre total_length = len(tree) internal_node_number = -1 @@ -233,7 +237,6 @@ def readnewick(tree): if m =='(': internal_node_number -= 1 - # print 'internal_node_number=', internal_node_number child = node(internal_node_number) pre.append(child) nd.lchild=child @@ -242,7 +245,6 @@ def readnewick(tree): nd=child elif m == ',': internal_node_number -= 1 - # print 'internal_node_number=', internal_node_number rsib = node(internal_node_number) pre.append(rsib) nd.rsib = rsib @@ -290,20 +292,15 @@ def Makenewick(pre): for i,nd in enumerate(pre): if nd.lchild: newickstring += '(' -# print nd.number, nd.lchild.number elif nd.rsib: -# print nd.number, nd.rsib.number - newickstring += '%d' %(nd.number) newickstring += ':%.1f' % nd.edgelen - newickstring += ',' else: newickstring += '%d' %(nd.number) newickstring += ':%.1f' % nd.edgelen - tmpnd = nd while (tmpnd.par is not None) and (tmpnd.rsib is None): newickstring += ')' @@ -312,9 +309,6 @@ def Makenewick(pre): if tmpnd.par is not None: newickstring += ',' - - - return newickstring ###################yule tree################################################### @@ -380,10 +374,4 @@ def calcExpectedHeight(num_species, mu_over_s): yuletree = '(((1:0.03915,5:0.03915):0.387,(4:0.42253,2:0.42253):0.004):0.118,3:0.54433)' postorder = readnewick(yuletree) -# preorder = readnewick(yuletree) - -# for nd in postorder: -# print nd.number - y = prepareTree(postorder, readseq.patterns()) -# z = readnewick(yuletree) -# a = simulate(preorder, random1()) + result = prepareTree(postorder, readseq.patterns())