From 12f0bdf0f55ee1ba293377462d1de724b21a8434 Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Tue, 18 Apr 2017 18:20:46 -0400 Subject: [PATCH] 1st likelihood on a tree script --- tree.py | 332 +++++++++++++++--------------------- tree_test.py | 466 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 603 insertions(+), 195 deletions(-) create mode 100644 tree_test.py diff --git a/tree.py b/tree.py index b3b29c8..f210c20 100644 --- a/tree.py +++ b/tree.py @@ -1,198 +1,140 @@ - -# git clone https://github.uconn.edu/sun13005/sntree.git . -# git status -# git add -# git commit -am "some message" -# git push -# git log - import random -import re -from itertools import chain -class node(object): - def __init__(self, ndnum): # initialization function - self.rsib = None # right sibling - self.lchild = None # left child - self.par = None # parent node - self.number = ndnum # node number (internals negative, tips 0 or positive) - self.edgelen = 0.0 # branch length - self.descendants = set([ndnum]) # set containing descendant leaf set - - def __str__(self): - # __str__ is a built-in function that is used by print to show an object - descendants_as_string = ','.join(['%d' % d for d in self.descendants]) -# print descendants_as_string - - lchildstr = 'None' - if self.lchild is not None: - lchildstr = '%d' % self.lchild.number - - rsibstr = 'None' - if self.rsib is not None: - rsibstr = '%d' % self.rsib.number - - parstr = 'None' - if self.par is not None: - parstr = '%d' % self.par.number - - return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - - - -tree = '((4,((1,2),3)),5)' - -# tree = '((4:3.0,(6:7.0,((1:1.0,2:1.0):5.0,3:6.0):4.0):1.0):7.0,5:2.0)' -# tree = '((4:3.0,((1:1.0,2:1.0):5.0,3:6.0):4.0):7.0,5:2.0)' -# tree = '(((14:1.0,((1:1.0,(7:1.0,12:1.0):1.0):1.0,(9:1.0,15:1.0):1.0):1.0):1.0,4:1.0):1.0,((20:1.0,((((18:1.0,11:1.0):1.0,(3:1.0,(24:1.0,(19:1.0,56:1.0):1.0):1.0):1.0):1.0,13:1.0):1.0,((((2:1.0):1.0):1.0):1.0):1.0):1.0):1.0):1.0)' -# tree = '((2,(1,7)),((3,6),(4,5)))' - - -ext_br = re.findall('\\(\d+:(\d+\.\d+)' and '\d+:(\d+\.\d+)', tree) -int_br = re.findall('\\):(\d+\.\d+)', tree) - -int_br2 = map(float, int_br) -ext_br2 = map(float, ext_br) - -len_int_br = (len(int_br2)-1) -len_ext_br = (len(ext_br2)-1) - - - - -print 'int_nd.edgelen', int_br2 -print 'ext_nd.edgelen', ext_br2 - -total_length = len(tree) -internal_node_number = -1 - -root = node(internal_node_number) -nd = root -i = 0 -t= 0 -pre = [root] - -try: - - while i < total_length: - m = tree[i] - - if m =='(': - internal_node_number -= 1 - -# print 'internal_node_number=', internal_node_number - child = node(internal_node_number) - pre.append(child) - nd.lchild=child - - child.par=nd - nd=child -# nd.edgelen = int_br2[len_int_br] - len_int_br -=1 - -# print 'nd=', nd.number -# print 'nd.edgelen', nd.edgelen - - print '*******************' - - elif m == ',': - internal_node_number -= 1 -# print 'internal_node_number=', internal_node_number - rsib = node(internal_node_number) - pre.append(rsib) - nd.rsib = rsib - rsib.par=nd.par - nd = rsib - len_int_br +=1 -# nd.edgelen = int_br2[len_int_br] - len_int_br -=1 - - - -# print 'nd=', nd.number - print '...................' - elif m == ')': - nd = nd.par - print '+++++++++++++++++++' - - elif m == ':': - edge_len_str = '' - i+=1 - m = tree[i] - assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] - while m in ['0','1','2','3','4','5','6','7','8', '9','.']: - edge_len_str += m - i+=1 - m = tree[i] - i -=1 - nd.edgelen = float(edge_len_str) - - print '<<<<<<<<<<<<<<<<<<<<' - - else: - internal_node_number += 1 - - if True: - assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m - mm = '' - while m in ['0','1','2','3','4','5','6','7','8', '9' ]: - - mm += m - - i += 1 - m = tree[i] -# nd.edgelen = ext_br2[t] - nd.number = int(mm) -# print 'ext_node', mm - i -= 1 - - print '^^^^^^^^^^^^^^^^^^^^^^^^' - - print 'nd.number=', nd.number - print 'nd.edgelen=', nd.edgelen - i += 1 - - print '#####################################################' - -except: - "out of range" -for nd in pre: - print nd - - - -def Makenewick(pre): - newickstring = '' - for i,nd in enumerate(pre): +class node(object): + def __init__(self, ndnum): # initialization function + self.rsib = None # right sibling + self.lchild = None # left child + self.par = None # parent node + self.number = ndnum # node number + self.edgelen = 0.0 # branch length + self.height = 0.0 # height above root + self.deep = False # True if this node resulted from a deep coalescence + self.descendants = set([ndnum]) # set containing descendant leaf set + +def joinRandomPair(node_list, next_node_number, is_deep_coalescence): + # pick first of two lineages to join and delete from node_list + i = random.randint(1, len(node_list)) + ndi = node_list[i-1] + del node_list[i-1] + + # pick second of two lineages to join and delete from node_list + j = random.randint(1, len(node_list)) + ndj = node_list[j-1] + del node_list[j-1] + + # join selected nodes and add ancestor to node_list + ancnd = node(next_node_number) + ancnd.deep = is_deep_coalescence + ancnd.lchild = ndi + ancnd.descendants = set() + ancnd.descendants |= ndi.descendants + ancnd.descendants |= ndj.descendants + ndi.rsib = ndj + ndi.par = ancnd + ndj.par = ancnd + node_list.append(ancnd) + + return node_list + +def joinSpecificPair(node_list, i, j, next_node_number, is_deep_coalescence): + assert j != i + + ndi = None + for k,nd in enumerate(node_list): + if nd.number == i: + ndi = node_list.pop(k) + assert ndi is not None + + ndj = None + for k,nd in enumerate(node_list): + if nd.number == j: + ndj = node_list.pop(k) + assert ndj is not None + + # join selected nodes and add ancestor to node_list + ancnd = node(next_node_number) + ancnd.deep = is_deep_coalescence + ancnd.lchild = ndi + ancnd.descendants = set() + ancnd.descendants |= ndi.descendants + ancnd.descendants |= ndj.descendants + ndi.rsib = ndj + ndi.par = ancnd + ndj.par = ancnd + node_list.append(ancnd) + + return node_list + +def getPreorder(nd, start = True): + global _preorder + if start: + _preorder = [] + + _preorder.append(nd) + + if nd.lchild: + getPreorder(nd.lchild, False) + + if nd.rsib: + getPreorder(nd.rsib, False) + + return _preorder + +def flipAllNodesAbove(root): + node_list = getPreorder(root) + for nd in node_list: if nd.lchild: - newickstring += '(' - print nd.number, nd.lchild.number - - elif nd.rsib: - print nd.number, nd.rsib.number - - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - - newickstring += ',' - - else: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - - tmpnd = nd - while (tmpnd.par is not None) and (tmpnd.rsib is None): - newickstring += ')' - newickstring += ':%.1f' % tmpnd.par.edgelen - tmpnd = tmpnd.par - - if tmpnd.par is not None: - newickstring += ',' - - - - return newickstring -newick_string = Makenewick(pre) -print newick_string - - + lc = nd.lchild + rc = lc.rsib + nd.lchild = rc + assert rc.rsib is None + rc.rsib = lc + lc.rsib = None + +def makeNewick(nd, brlen_scaler = 1.0, start = True): + global _newick + global _TL + if start: + _newick = '' + _TL = 0.0 + + if nd.lchild: + _newick += '(' + makeNewick(nd.lchild, brlen_scaler, False) + else: + blen = nd.edgelen*brlen_scaler + _TL += blen + _newick += '%d:%.5f' % (nd.number, blen) + + if nd.rsib: + _newick += ',' + makeNewick(nd.rsib, brlen_scaler, False) + elif nd.par is not None: + blen = nd.par.edgelen*brlen_scaler + _TL += blen + _newick += '):%.5f' % blen + + return _newick, _TL + +def calcActualHeight(root): + h = 0.0 + nd = root + while nd.lchild: + nd = nd.lchild + h += nd.edgelen + return h + +def sumEdgeLengths(root): + sum_edge_lengths = 0.0 + pre = getPreorder(root) + for nd in pre: + sum_edge_lengths += nd.edgelen + return sum_edge_lengths + +def countDeepCoalescences(root): + n = 0 + preorder = getPreorder(root) + for nd in preorder: + if nd.deep: + n += 1 + return n diff --git a/tree_test.py b/tree_test.py new file mode 100644 index 0000000..77014c0 --- /dev/null +++ b/tree_test.py @@ -0,0 +1,466 @@ + +# git clone https://github.uconn.edu/sun13005/sntree.git . +# git status +# git add +# git commit -am "some message" +# git push +# git log +# git show fe580563:tree.py > tmp.py --> to extract older version +import readseq +import random +import re, os, itertools, sys, glob +from itertools import chain +from math import exp, log +class node(object): + def __init__(self, ndnum): # initialization function + self.rsib = None # right sibling + self.lchild = None # left child + self.par = None # parent node + self.number = ndnum # node number (internals negative, tips 0 or positive) + self.edgelen = 0.0 # branch length + self.descendants = set([ndnum]) # set containing descendant leaf set + self.partial = None # will have length 4*npatterns +# self.probsame = 0.0 +# self.probdiff = 0.0 + + def allocatePartial(self, patterns): + # skip internals for now + print 'self.number, self.edgelen =',self.number, self.edgelen + + if self.number > 0: + # patterns['ACGTT'] = 51 + npatterns = len(patterns) + self.partial = [0.0]*(4*npatterns) + + print 'self.partial=', self.partial + + for i,pattern in enumerate(patterns.keys()): + + base = pattern[self.number-1] + print 'i = %d, pattern = %s, base = %s' % (i,pattern,base) + if base == 'A': + self.partial[i*4 + 0] = 1.0 + elif base == 'C': + self.partial[i*4 + 1] = 1.0 + elif base == 'G': + self.partial[i*4 + 2] = 1.0 + elif base == 'T': + self.partial[i*4 + 3] = 1.0 + else: + assert(False), 'oops, something went horribly wrong!' + + + print self.partial + print '****************************' + + else: + npatterns = len(patterns) + print 'npatterns=', npatterns + self.partial = [0.0]*(4*npatterns) + + for i,pattern in enumerate(patterns.keys()): +# print 'edge_length==', self.lchild.edgelen +# print 'test1=', self.lchild.partial[i*4 + 0], self.lchild.rsib.partial[i*4 + 0] +# print 'test2=', self.lchild.partial[i*4 + 1], self.lchild.rsib.partial[i*4 + 1] +# print 'test3=', self.lchild.partial[i*4 + 2], self.lchild.rsib.partial[i*4 + 2] +# print 'test4=', self.lchild.partial[i*4 + 3], self.lchild.rsib.partial[i*4 + 3] +# print'____________________' +# print 'test5=', self.lchild.edgelen +# print 'test6=', self.lchild.rsib.edgelen +# print'____________________' + + pAA = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 0]) + pAC = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 1]) + pAG = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 2]) + pAT = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 3]) + + pAA2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 0]) + pAC2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 1]) + pAG2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 2]) + pAT2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 3]) + + pfromA_lchild = pAA+pAC+pAG+pAT + pfromA_rchild = pAA2+pAC2+pAG2+pAT2 + self.partial[i*4 + 0] = pfromA_lchild*pfromA_rchild + +############################################################################################################################## + + pCA = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 0]) + pCC = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 1]) + pCG = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 2]) + pCT = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 3]) + + pCA2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 0]) + pCC2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 1]) + pCG2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 2]) + pCT2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 3]) + + pfromC_lchild = pCA+pCC+pCG+pCT + pfromC_rchild = pCA2+pCC2+pCG2+pCT2 + self.partial[i*4 + 1] = pfromC_lchild*pfromC_rchild +############################################################################################################################## +# + pGA = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 0]) + pGC = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 1]) + pGG = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 2]) + pGT = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 3]) +# + pGA2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 0]) + pGC2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 1]) + pGG2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 2]) + pGT2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 3]) +# + pfromG_lchild = pGA+pGC+pGG+pGT + pfromG_rchild = pGA2+pGC2+pGG2+pGT2 + self.partial[i*4 + 2] = pfromG_lchild*pfromG_rchild +############################################################################################################################## + pTA = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 0]) + pTC = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 1]) + pTG = (0.25-0.25*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 2]) + pTT = (0.25+0.75*exp(-4.0*(self.lchild.edgelen)/3.0))*(self.lchild.partial[i*4 + 3]) +# + pTA2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 0]) + pTC2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 1]) + pTG2 = (0.25-0.25*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 2]) + pTT2 = (0.25+0.75*exp(-4.0*(self.lchild.rsib.edgelen)/3.0))*(self.lchild.rsib.partial[i*4 + 3]) +# + pfromT_lchild = pTA+pTC+pTG+pTT + pfromT_rchild = pTA2+pTC2+pTG2+pTT2 + self.partial[i*4 + 3] = pfromT_lchild*pfromT_rchild +############################################################################################################################## + site1= (sum(self.partial[0:4]))*0.25 + site2= (sum(self.partial[4:8]))*0.25 + Like = log(site1)+log(site2) + + print self.partial + print 'site1=', site1 + print 'site2=', site2 + print 'Like=', Like + + print '****************************' + def __str__(self): + # __str__ is a built-in function that is used by print to show an object + descendants_as_string = ','.join(['%d' % d for d in self.descendants]) +# print descendants_as_string + + lchildstr = 'None' + if self.lchild is not None: + lchildstr = '%d' % self.lchild.number + + rsibstr = 'None' + if self.rsib is not None: + rsibstr = '%d' % self.rsib.number + + parstr = 'None' + if self.par is not None: + parstr = '%d' % self.par.number + + return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) + + + +def prepareTree(postorder, patterns): + for nd in postorder: + + nd.allocatePartial(patterns) + + +def joinRandomPair(node_list, next_node_number, is_deep_coalescence): + # pick first of two lineages to join and delete from node_list + i = random.randint(1, len(node_list)) + ndi = node_list[i-1] + del node_list[i-1] + + # pick second of two lineages to join and delete from node_list + j = random.randint(1, len(node_list)) + ndj = node_list[j-1] + del node_list[j-1] + + # join selected nodes and add ancestor to node_list + ancnd = node(next_node_number) + ancnd.deep = is_deep_coalescence + ancnd.lchild = ndi + ancnd.descendants = set() + ancnd.descendants |= ndi.descendants + ancnd.descendants |= ndj.descendants + ndi.rsib = ndj + ndi.par = ancnd + ndj.par = ancnd + node_list.append(ancnd) + + return node_list + + +def makeNewick(nd, brlen_scaler = 1.0, start = True): # + global _newick + global _TL + + if start: + _newick = '' + _TL = 0.0 + +# print + + if nd.lchild: + _newick += '(' + makeNewick(nd.lchild, brlen_scaler, False) + + else: + blen = nd.edgelen*brlen_scaler + _TL += blen + _newick += '%d:%.5f' % (nd.number, blen) + + if nd.rsib: + _newick += ',' + makeNewick(nd.rsib, brlen_scaler, False) + elif nd.par is not None: + blen = nd.par.edgelen*brlen_scaler + _TL += blen + _newick += '):%.3f' % blen + + return _newick, _TL + +def calcActualHeight(root): + h = 0.0 + nd = root + while nd.lchild: + nd = nd.lchild + h += nd.edgelen + return h + +def getPostorder(nd, start = True): # how to travel across tree + global _postorder + if start: + _postorder = [] # start with an empty list + + + if nd.lchild: + getPostorder(nd.lchild, False) # recursive function to + _postorder.append(nd) + + if nd.rsib: + getPostorder(nd.rsib, False) + + + + return _postorder + + +def readnewick(tree): +# global pre + total_length = len(tree) + internal_node_number = -1 + + root = node(internal_node_number) + nd = root + i = 0 + pre = [root] + while i < total_length: + m = tree[i] + + if m =='(': + internal_node_number -= 1 + + # print 'internal_node_number=', internal_node_number + child = node(internal_node_number) + pre.append(child) + nd.lchild=child + + child.par=nd + nd=child + # nd.edgelen = int_br2[len_int_br] + # len_int_br -=1 + + # print 'nd=', nd.number + # print 'nd.edgelen', nd.edgelen + +# print '*******************' + elif m == ',': + internal_node_number -= 1 + # print 'internal_node_number=', internal_node_number + rsib = node(internal_node_number) + pre.append(rsib) + nd.rsib = rsib + rsib.par=nd.par + nd = rsib + # len_int_br +=1 + + # nd.edgelen = int_br2[len_int_br] + # len_int_br -=1 + + # print 'nd=', nd.number +# print '...................' + elif m == ')': + nd = nd.par +# print '+++++++++++++++++++' + + elif m == ':': + edge_len_str = '' + i+=1 + m = tree[i] + assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] + while m in ['0','1','2','3','4','5','6','7','8', '9','.']: + edge_len_str += m + i+=1 + m = tree[i] + i -=1 + nd.edgelen = float(edge_len_str) + +# print '<<<<<<<<<<<<<<<<<<<<' + + else: + internal_node_number += 1 + + if True: + assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m + mm = '' + while m in ['0','1','2','3','4','5','6','7','8', '9' ]: + + mm += m + + i += 1 + m = tree[i] + # nd.edgelen = ext_br2[t] + nd.number = int(mm) + # print 'ext_node', mm + i -= 1 + +# print '^^^^^^^^^^^^^^^^^^^^^^^^' + +# print 'nd.number=', nd.number +# print 'nd.edgelen=', nd.edgelen + i += 1 + +# print '############################ preorder ############################' +# for nd in pre: +# print nd.number + +# print '############################ postorder1 ############################' + post = pre[:] + post.reverse() +# for nd in post: +# print nd.number + +# print '############################ postorder2 ############################' +# for i in range(len(pre)): +# print pre[-i-1].number + + + return post +# print post +# y= getPostorder(root) + +# for nd in pre: +# print nd.number + + + +def Makenewick(pre): + newickstring = '' + for i,nd in enumerate(pre): + if nd.lchild: + newickstring += '(' +# print nd.number, nd.lchild.number + + elif nd.rsib: +# print nd.number, nd.rsib.number + + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + + newickstring += ',' + + else: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + + tmpnd = nd + while (tmpnd.par is not None) and (tmpnd.rsib is None): + newickstring += ')' + newickstring += ':%.1f' % tmpnd.par.edgelen + tmpnd = tmpnd.par + + if tmpnd.par is not None: + newickstring += ',' + + + + return newickstring + +###################yule tree################################################### +# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree +# - num_species is the number of leaves (tips) in the tree +def calcPhi(num_species): + phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) + return phi + +# yuleTree creates a species tree in which edge lengths are measured in +# expected number of substitutions. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def yuleTree(num_species, mu_over_s): + # create num_species nodes numbered 1, 2, ..., num_species + nodes = [node(i+1) for i in range(num_species)] + + next_node_number = num_species + 1 + while len(nodes) > 1: + # choose a speciation time in generations + K = float(len(nodes)) + mean_epoch_length = mu_over_s/K + t = random.gammavariate(1.0, mean_epoch_length) + + # update each node's edgelen + for n in nodes: + n.edgelen += t # same as: n.edgelen = n.edgelen + t + + nodes = joinRandomPair(nodes, next_node_number, False) + next_node_number += 1 + + return nodes[0] + +# calcExpectedHeight returns the expected height of the species tree in terms of +# expected number of substitutions from the root to one tip. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def calcExpectedHeight(num_species, mu_over_s): + return mu_over_s*calcPhi(num_species) + + +if __name__ == '__main__': + random_seed = 24223 # 7632557, 12345 + number_of_species = 5 + mutation_speciation_rate_ratio = 0.4 # 0.689655172 # yields tree height 1 for 6 species + random.seed(random_seed) + species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) +# print '#########' +# print species_tree_root + newick = makeNewick(species_tree_root) +# print 'Random number seed: %d' % random_seed +# print 'Simulating one tree:' +# print ' number of species = %d' % number_of_species +# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio +# print ' actual tree length =',newick[1] + expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) +# print ' expected height =',expected_height + actual_height = calcActualHeight(species_tree_root) +# print ' actual height =',actual_height +# print ' newick: ',newick[0] + + + yuletree = '(((1:0.03915,5:0.03915):0.387,(4:0.42253,2:0.42253):0.004):0.118,3:0.54433)' + + postorder = readnewick(yuletree) + +# for nd in postorder: +# print nd.number + y = prepareTree(postorder, readseq.patterns()) + z = readnewick(yuletree) +# for nd in z: +# print nd.number, nd.edgelen +# for nd in postorder: +# nd.allocatePartial(patterns) +# +# prepareTree = prepareTree(readnewick, readseq.patterns()) +# print prepareTree + +###########################read sequences#########