diff --git a/tree.py b/tree.py new file mode 100644 index 0000000..298d0c5 --- /dev/null +++ b/tree.py @@ -0,0 +1,191 @@ + +import random +import re +from itertools import chain +class node(object): + def __init__(self, ndnum): # initialization function + self.rsib = None # right sibling + self.lchild = None # left child + self.par = None # parent node + self.number = ndnum # node number (internals negative, tips 0 or positive) + self.edgelen = 0.0 # branch length + self.descendants = set([ndnum]) # set containing descendant leaf set + + def __str__(self): + # __str__ is a built-in function that is used by print to show an object + descendants_as_string = ','.join(['%d' % d for d in self.descendants]) +# print descendants_as_string + + lchildstr = 'None' + if self.lchild is not None: + lchildstr = '%d' % self.lchild.number + + rsibstr = 'None' + if self.rsib is not None: + rsibstr = '%d' % self.rsib.number + + parstr = 'None' + if self.par is not None: + parstr = '%d' % self.par.number + + return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) + + + +tree = '((4,((1,2),3)),5)' + +# tree = '((4:3.0,(6:7.0,((1:1.0,2:1.0):5.0,3:6.0):4.0):1.0):7.0,5:2.0)' +# tree = '((4:3.0,((1:1.0,2:1.0):5.0,3:6.0):4.0):7.0,5:2.0)' +# tree = '(((14:1.0,((1:1.0,(7:1.0,12:1.0):1.0):1.0,(9:1.0,15:1.0):1.0):1.0):1.0,4:1.0):1.0,((20:1.0,((((18:1.0,11:1.0):1.0,(3:1.0,(24:1.0,(19:1.0,56:1.0):1.0):1.0):1.0):1.0,13:1.0):1.0,((((2:1.0):1.0):1.0):1.0):1.0):1.0):1.0):1.0)' +# tree = '((2,(1,7)),((3,6),(4,5)))' + + +ext_br = re.findall('\\(\d+:(\d+\.\d+)' and '\d+:(\d+\.\d+)', tree) +int_br = re.findall('\\):(\d+\.\d+)', tree) + +int_br2 = map(float, int_br) +ext_br2 = map(float, ext_br) + +len_int_br = (len(int_br2)-1) +len_ext_br = (len(ext_br2)-1) + + + + +print 'int_nd.edgelen', int_br2 +print 'ext_nd.edgelen', ext_br2 + +total_length = len(tree) +internal_node_number = -1 + +root = node(internal_node_number) +nd = root +i = 0 +t= 0 +pre = [root] + +try: + + while i < total_length: + m = tree[i] + + if m =='(': + internal_node_number -= 1 + +# print 'internal_node_number=', internal_node_number + child = node(internal_node_number) + pre.append(child) + nd.lchild=child + + child.par=nd + nd=child +# nd.edgelen = int_br2[len_int_br] + len_int_br -=1 + +# print 'nd=', nd.number +# print 'nd.edgelen', nd.edgelen + + print '*******************' + + elif m == ',': + internal_node_number -= 1 +# print 'internal_node_number=', internal_node_number + rsib = node(internal_node_number) + pre.append(rsib) + nd.rsib = rsib + rsib.par=nd.par + nd = rsib + len_int_br +=1 + +# nd.edgelen = int_br2[len_int_br] + len_int_br -=1 + + + +# print 'nd=', nd.number + print '...................' + elif m == ')': + nd = nd.par + print '+++++++++++++++++++' + + elif m == ':': + edge_len_str = '' + i+=1 + m = tree[i] + assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] + while m in ['0','1','2','3','4','5','6','7','8', '9','.']: + edge_len_str += m + i+=1 + m = tree[i] + i -=1 + nd.edgelen = float(edge_len_str) + + print '<<<<<<<<<<<<<<<<<<<<' + + else: + internal_node_number += 1 + + if True: + assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m + mm = '' + while m in ['0','1','2','3','4','5','6','7','8', '9' ]: + + mm += m + + i += 1 + m = tree[i] +# nd.edgelen = ext_br2[t] + nd.number = int(mm) +# print 'ext_node', mm + i -= 1 + + print '^^^^^^^^^^^^^^^^^^^^^^^^' + + print 'nd.number=', nd.number + print 'nd.edgelen=', nd.edgelen + i += 1 + + print '#####################################################' + +except: + "out of range" +for nd in pre: + print nd + + + +def Makenewick(pre): + newickstring = '' + for i,nd in enumerate(pre): + if nd.lchild: + newickstring += '(' + print nd.number, nd.lchild.number + + elif nd.rsib: + print nd.number, nd.rsib.number + + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + + newickstring += ',' + + else: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + + tmpnd = nd + while (tmpnd.par is not None) and (tmpnd.rsib is None): + newickstring += ')' + newickstring += ':%.1f' % tmpnd.par.edgelen + tmpnd = tmpnd.par + + if tmpnd.par is not None: + newickstring += ',' + + + + return newickstring +newick_string = Makenewick(pre) +print newick_string + +