Skip to content

Commit

Permalink
1st likelihood on a tree script
Browse files Browse the repository at this point in the history
  • Loading branch information
sun13005 committed Apr 18, 2017
1 parent ef31ab8 commit 12f0bdf
Show file tree
Hide file tree
Showing 2 changed files with 603 additions and 195 deletions.
332 changes: 137 additions & 195 deletions tree.py
Original file line number Diff line number Diff line change
@@ -1,198 +1,140 @@

# git clone https://github.uconn.edu/sun13005/sntree.git .
# git status
# git add <file name>
# git commit -am "some message"
# git push
# git log

import random
import re
from itertools import chain
class node(object):
def __init__(self, ndnum): # initialization function
self.rsib = None # right sibling
self.lchild = None # left child
self.par = None # parent node
self.number = ndnum # node number (internals negative, tips 0 or positive)
self.edgelen = 0.0 # branch length
self.descendants = set([ndnum]) # set containing descendant leaf set

def __str__(self):
# __str__ is a built-in function that is used by print to show an object
descendants_as_string = ','.join(['%d' % d for d in self.descendants])
# print descendants_as_string

lchildstr = 'None'
if self.lchild is not None:
lchildstr = '%d' % self.lchild.number

rsibstr = 'None'
if self.rsib is not None:
rsibstr = '%d' % self.rsib.number

parstr = 'None'
if self.par is not None:
parstr = '%d' % self.par.number

return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string)



tree = '((4,((1,2),3)),5)'

# tree = '((4:3.0,(6:7.0,((1:1.0,2:1.0):5.0,3:6.0):4.0):1.0):7.0,5:2.0)'
# tree = '((4:3.0,((1:1.0,2:1.0):5.0,3:6.0):4.0):7.0,5:2.0)'
# tree = '(((14:1.0,((1:1.0,(7:1.0,12:1.0):1.0):1.0,(9:1.0,15:1.0):1.0):1.0):1.0,4:1.0):1.0,((20:1.0,((((18:1.0,11:1.0):1.0,(3:1.0,(24:1.0,(19:1.0,56:1.0):1.0):1.0):1.0):1.0,13:1.0):1.0,((((2:1.0):1.0):1.0):1.0):1.0):1.0):1.0):1.0)'
# tree = '((2,(1,7)),((3,6),(4,5)))'


ext_br = re.findall('\\(\d+:(\d+\.\d+)' and '\d+:(\d+\.\d+)', tree)
int_br = re.findall('\\):(\d+\.\d+)', tree)

int_br2 = map(float, int_br)
ext_br2 = map(float, ext_br)

len_int_br = (len(int_br2)-1)
len_ext_br = (len(ext_br2)-1)




print 'int_nd.edgelen', int_br2
print 'ext_nd.edgelen', ext_br2

total_length = len(tree)
internal_node_number = -1

root = node(internal_node_number)
nd = root
i = 0
t= 0
pre = [root]

try:

while i < total_length:
m = tree[i]

if m =='(':
internal_node_number -= 1

# print 'internal_node_number=', internal_node_number
child = node(internal_node_number)
pre.append(child)
nd.lchild=child

child.par=nd
nd=child
# nd.edgelen = int_br2[len_int_br]
len_int_br -=1

# print 'nd=', nd.number
# print 'nd.edgelen', nd.edgelen

print '*******************'

elif m == ',':
internal_node_number -= 1
# print 'internal_node_number=', internal_node_number
rsib = node(internal_node_number)
pre.append(rsib)
nd.rsib = rsib
rsib.par=nd.par
nd = rsib
len_int_br +=1

# nd.edgelen = int_br2[len_int_br]
len_int_br -=1



# print 'nd=', nd.number
print '...................'
elif m == ')':
nd = nd.par
print '+++++++++++++++++++'

elif m == ':':
edge_len_str = ''
i+=1
m = tree[i]
assert m in ['0','1','2','3','4','5','6','7','8', '9','.']
while m in ['0','1','2','3','4','5','6','7','8', '9','.']:
edge_len_str += m
i+=1
m = tree[i]
i -=1
nd.edgelen = float(edge_len_str)

print '<<<<<<<<<<<<<<<<<<<<'

else:
internal_node_number += 1

if True:
assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m
mm = ''
while m in ['0','1','2','3','4','5','6','7','8', '9' ]:

mm += m

i += 1
m = tree[i]
# nd.edgelen = ext_br2[t]
nd.number = int(mm)
# print 'ext_node', mm
i -= 1

print '^^^^^^^^^^^^^^^^^^^^^^^^'

print 'nd.number=', nd.number
print 'nd.edgelen=', nd.edgelen
i += 1

print '#####################################################'

except:
"out of range"
for nd in pre:
print nd



def Makenewick(pre):
newickstring = ''
for i,nd in enumerate(pre):
class node(object):
def __init__(self, ndnum): # initialization function
self.rsib = None # right sibling
self.lchild = None # left child
self.par = None # parent node
self.number = ndnum # node number
self.edgelen = 0.0 # branch length
self.height = 0.0 # height above root
self.deep = False # True if this node resulted from a deep coalescence
self.descendants = set([ndnum]) # set containing descendant leaf set

def joinRandomPair(node_list, next_node_number, is_deep_coalescence):
# pick first of two lineages to join and delete from node_list
i = random.randint(1, len(node_list))
ndi = node_list[i-1]
del node_list[i-1]

# pick second of two lineages to join and delete from node_list
j = random.randint(1, len(node_list))
ndj = node_list[j-1]
del node_list[j-1]

# join selected nodes and add ancestor to node_list
ancnd = node(next_node_number)
ancnd.deep = is_deep_coalescence
ancnd.lchild = ndi
ancnd.descendants = set()
ancnd.descendants |= ndi.descendants
ancnd.descendants |= ndj.descendants
ndi.rsib = ndj
ndi.par = ancnd
ndj.par = ancnd
node_list.append(ancnd)

return node_list

def joinSpecificPair(node_list, i, j, next_node_number, is_deep_coalescence):
assert j != i

ndi = None
for k,nd in enumerate(node_list):
if nd.number == i:
ndi = node_list.pop(k)
assert ndi is not None

ndj = None
for k,nd in enumerate(node_list):
if nd.number == j:
ndj = node_list.pop(k)
assert ndj is not None

# join selected nodes and add ancestor to node_list
ancnd = node(next_node_number)
ancnd.deep = is_deep_coalescence
ancnd.lchild = ndi
ancnd.descendants = set()
ancnd.descendants |= ndi.descendants
ancnd.descendants |= ndj.descendants
ndi.rsib = ndj
ndi.par = ancnd
ndj.par = ancnd
node_list.append(ancnd)

return node_list

def getPreorder(nd, start = True):
global _preorder
if start:
_preorder = []

_preorder.append(nd)

if nd.lchild:
getPreorder(nd.lchild, False)

if nd.rsib:
getPreorder(nd.rsib, False)

return _preorder

def flipAllNodesAbove(root):
node_list = getPreorder(root)
for nd in node_list:
if nd.lchild:
newickstring += '('
print nd.number, nd.lchild.number

elif nd.rsib:
print nd.number, nd.rsib.number

newickstring += '%d' %(nd.number)
newickstring += ':%.1f' % nd.edgelen

newickstring += ','

else:
newickstring += '%d' %(nd.number)
newickstring += ':%.1f' % nd.edgelen

tmpnd = nd
while (tmpnd.par is not None) and (tmpnd.rsib is None):
newickstring += ')'
newickstring += ':%.1f' % tmpnd.par.edgelen
tmpnd = tmpnd.par

if tmpnd.par is not None:
newickstring += ','



return newickstring
newick_string = Makenewick(pre)
print newick_string


lc = nd.lchild
rc = lc.rsib
nd.lchild = rc
assert rc.rsib is None
rc.rsib = lc
lc.rsib = None

def makeNewick(nd, brlen_scaler = 1.0, start = True):
global _newick
global _TL
if start:
_newick = ''
_TL = 0.0

if nd.lchild:
_newick += '('
makeNewick(nd.lchild, brlen_scaler, False)
else:
blen = nd.edgelen*brlen_scaler
_TL += blen
_newick += '%d:%.5f' % (nd.number, blen)

if nd.rsib:
_newick += ','
makeNewick(nd.rsib, brlen_scaler, False)
elif nd.par is not None:
blen = nd.par.edgelen*brlen_scaler
_TL += blen
_newick += '):%.5f' % blen

return _newick, _TL

def calcActualHeight(root):
h = 0.0
nd = root
while nd.lchild:
nd = nd.lchild
h += nd.edgelen
return h

def sumEdgeLengths(root):
sum_edge_lengths = 0.0
pre = getPreorder(root)
for nd in pre:
sum_edge_lengths += nd.edgelen
return sum_edge_lengths

def countDeepCoalescences(root):
n = 0
preorder = getPreorder(root)
for nd in preorder:
if nd.deep:
n += 1
return n
Loading

0 comments on commit 12f0bdf

Please sign in to comment.