diff --git a/simSequences.py b/simSequences.py deleted file mode 100644 index 4181da1..0000000 --- a/simSequences.py +++ /dev/null @@ -1,174 +0,0 @@ -import readSeq -import random -import re, os, itertools, sys, glob -from itertools import chain -from math import exp, log -class node(object): - def __init__(self, ndnum): # initialization function - self.rsib = None # right sibling - self.lchild = None # left child - self.par = None # parent node - self.number = ndnum # node number (internals negative, tips 0 or positive) - self.edgelen = 0.0 # branch length - self.descendants = set([ndnum]) # set containing descendant leaf set - self.partial = None # will have length 4*npatterns - self.state = None - self.states = None - - - def simulateSequences(self, num_sites): - self.states = [str]*(num_sites) - freq = [0.25, 0.25, 0.25, 0.25] - current_states = [ 'A', 'C', 'G', 'T'] - if self.par is None: - - for i in range(num_sites): - - ran_nm = random.random() - if ran_nm < freq[0]: -# self.state = 'A' - self.states[i] = 'A' - elif ran_nm <= freq[0]+freq[1]: -# self.state = 'C' - self.states[i] = 'C' - elif ran_nm <= freq[0]+freq[1]+freq[2]: -# self.state = 'G' - self.states[i] = 'G' - - else: -# self.state = 'T' - self.states[i] = 'T' - - else: - for m in range(num_sites): - prob = [] - ran_nm = random.random() - - for i in current_states: - if self.par.states[m] == i: - p = (0.25+0.75*exp(-4.0*(self.edgelen)/3.0)) - prob.append(p) - else: - p = (0.25-0.25*exp(-4.0*(self.edgelen)/3.0)) - prob.append(p) - for i in prob: - - if ran_nm <= prob[0]: -# self.state = 'A' - self.states[m] = 'A' - - elif ran_nm <= prob[0]+ prob[1]: -# self.state = 'C' - self.states[m] = 'C' - - elif ran_nm <= prob[0]+ prob[1]+ prob[2]: -# self.state = 'G' - self.states[m] = 'G' - - else: -# self.state = 'T' - self.states[m] = 'T' - - return self.states - - def __str__(self): - # __str__ is a built-in function that is used by print to show an object - descendants_as_string = ','.join(['%d' % d for d in self.descendants]) - - lchildstr = 'None' - if self.lchild is not None: - lchildstr = '%d' % self.lchild.number - - rsibstr = 'None' - if self.rsib is not None: - rsibstr = '%d' % self.rsib.number - - parstr = 'None' - if self.par is not None: - parstr = '%d' % self.par.number - - return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - -def simulate(preorder, ntax, num_sites, out): - newf = open(out, 'w') - newf.write('#nexus\n\n') - newf.write('begin data;\n') - newf.write('dimensions ntax=%d nchar=%d;\n' % (ntax, num_sites)) - newf.write('format datatype=dna missing=? gap=-;\n') - newf.write('matrix\n') - master = {} - for nd in preorder: - master[nd.number]=nd.simulateSequences(num_sites) - if nd.number >0: - newf.write('%s %s\n' % (nd.number, ''.join(nd.simulateSequences(num_sites)))) - newf.write(';\n') - newf.write('end;') -def readnewick(tree): - total_length = len(tree) - internal_node_number = -1 - - root = node(internal_node_number) - nd = root - i = 0 - pre = [root] - while i < total_length: - m = tree[i] - - if m =='(': - internal_node_number -= 1 - - child = node(internal_node_number) - pre.append(child) - nd.lchild=child - - child.par=nd - nd=child - elif m == ',': - internal_node_number -= 1 - rsib = node(internal_node_number) - pre.append(rsib) - nd.rsib = rsib - rsib.par=nd.par - nd = rsib - elif m == ')': - nd = nd.par - - elif m == ':': - edge_len_str = '' - i+=1 - m = tree[i] - assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] - while m in ['0','1','2','3','4','5','6','7','8', '9','.']: - edge_len_str += m - i+=1 - m = tree[i] - i -=1 - nd.edgelen = float(edge_len_str) - else: - internal_node_number += 1 - - if True: - assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m - mm = '' - while m in ['0','1','2','3','4','5','6','7','8', '9' ]: - - mm += m - i += 1 - m = tree[i] - nd.number = int(mm) - i -= 1 - i += 1 - - return pre - -if __name__ == '__main__': - - output_filename = os.path.join('simulated_output.nexus') - - yuletree = '(5:1.86010,((3:0.47109,2:0.47109):0.492,(4:0.05805,1:0.05805):0.906):0.896)' - - preorder = readnewick(yuletree) - ntax = 5 - num_sites = 100 - result = simulate(preorder, ntax, num_sites, output_filename) -