From becd19bfe07a5f8f8550effb514aaa42d73fb52f Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:51:35 -0400 Subject: [PATCH 01/11] all old brnlenmcmc --- brnlenMCMC/brnlenMCMC.py | 494 +++++++++++++++++++++++++++++++++++++++ brnlenMCMC/readSeq.py | 69 ++++++ brnlenMCMC/readtree.py | 22 ++ brnlenMCMC/tree.tre | 1 + 4 files changed, 586 insertions(+) create mode 100644 brnlenMCMC/brnlenMCMC.py create mode 100644 brnlenMCMC/readSeq.py create mode 100644 brnlenMCMC/readtree.py create mode 100644 brnlenMCMC/tree.tre diff --git a/brnlenMCMC/brnlenMCMC.py b/brnlenMCMC/brnlenMCMC.py new file mode 100644 index 0000000..b63467a --- /dev/null +++ b/brnlenMCMC/brnlenMCMC.py @@ -0,0 +1,494 @@ +########################################################################################## +# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree +# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model, +# and performs MCMC on branch length parameter +########################################################################################## + + +import readSeq +import random +import re, os, itertools, sys, glob +from itertools import chain +from scipy.stats import gamma +from math import exp, log + + +########################################################################################## +tree_file_name = 'tree.tre' +sequence_file = 'example3.nex' +alpha = 0.5 #gamma shape parameter for rate categories +n_gen = 4 +save_every = 1 +mean_expo = 10. #mean_expo = mean of exponential distribution for branch length prior +########################################################################################## + + + + +class node(object): + def __init__(self, ndnum): # initialization function + self.rsib = None # right sibling + self.lchild = None # left child + self.par = None # parent node + self.number = ndnum # node number (internals negative, tips 0 or positive) + self.edgelen = 0.0 # branch length + self.descendants = set([ndnum]) # set containing descendant leaf set + self.partial = None # will have length 4*npatterns + + + def __str__(self): + # __str__ is a built-in function that is used by print to show an object + descendants_as_string = ','.join(['%d' % d for d in self.descendants]) + + lchildstr = 'None' + if self.lchild is not None: + lchildstr = '%d' % self.lchild.number + + rsibstr = 'None' + if self.rsib is not None: + rsibstr = '%d' % self.rsib.number + + parstr = 'None' + if self.par is not None: + parstr = '%d' % self.par.number + + return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) + + + +def allocatePartial(node, patterns, rates): + if node.number > 0: + npatterns = len(patterns) + + if node.partial is None: + node.partial = [0.0]*(4*4*npatterns) +# print len(node.partial) + for i,pattern in enumerate(patterns.keys()): + base = pattern[node.number-1] + for l in range(4): + if base == 'A': + + + node.partial[i*16+l*4 + 0] = 1.0 + elif base == 'C': + node.partial[i*16+l*4 + 1] = 1.0 + elif base == 'G': + node.partial[i*16+l*4 + 2] = 1.0 + elif base == 'T': + node.partial[i*16+l*4 + 3] = 1.0 + else: + assert(False), 'oops, something went horribly wrong!' + + else: + + npatterns = len(patterns) + if node.partial is None: + node.partial = [0.0]*(4*4*npatterns) + + like_list = [] + for i,pattern in enumerate(patterns.keys()): + m_list = [] + num_pattern = patterns[pattern] + for l,m in enumerate(rates): + + psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0)) + pdiff = (0.25-0.25*exp(-4.0*m*(node.lchild.edgelen)/3.0)) + + psame2 = (0.25+0.75*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) + pdiff2 = (0.25-0.25*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) + + num_pattern = patterns[pattern] + pAA = psame*(node.lchild.partial[i*16+l*4 + 0]) + pAC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) + pAG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) + pAT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) + + pAA2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 0]) + pAC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) + pAG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) + pAT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromA_lchild = pAA+pAC+pAG+pAT + pfromA_rchild = pAA2+pAC2+pAG2+pAT2 + node.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild + + + ###################################################### + + pCA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) + pCC = psame*(node.lchild.partial[i*16+l*4 + 1]) + pCG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) + pCT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) + + pCA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) + pCC2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 1]) + pCG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) + pCT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromC_lchild = pCA+pCC+pCG+pCT + pfromC_rchild = pCA2+pCC2+pCG2+pCT2 + node.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild + + ####################################################### +# + pGA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) + pGC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) + pGG = psame*(node.lchild.partial[i*16+l*4 + 2]) + pGT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) + + pGA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) + pGC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) + pGG2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 2]) + pGT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromG_lchild = pGA+pGC+pGG+pGT + pfromG_rchild = pGA2+pGC2+pGG2+pGT2 + node.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild + + ####################################################### + + pTA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) + pTC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) + pTG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) + pTT = psame*(node.lchild.partial[i*16+l*4 + 3]) + + pTA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) + pTC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) + pTG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) + pTT2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromT_lchild = pTA+pTC+pTG+pTT + pfromT_rchild = pTA2+pTC2+pTG2+pTT2 + node.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild + site_like = (sum(node.partial[i*16:i*16+16]))*0.25*0.25 + site_log_like = (log(site_like))*num_pattern + like_list.append(site_log_like) + log_like = sum(like_list) + return log_like + + + +def mcmcbrn(postorder, patterns, rates): + nodes = postorder +# nodes = readnewick(treenewick()) + + mcmc = 0 + output = os.path.join('brnlenMCMC_results.txt') + newf = open(output, 'w') + newf.write('%s\t'%('n_gen')) + newf.write( '%s\t%s\t'%('LnL','LnPr')) + for nl in postorder: + newf.write( 'node%s\t'%(nl.number)) + newf.write('\n') + start_log_prior = 0.0 + for nd in nodes: + start_log_prior += (-nd.edgelen/mean_expo)-(log(mean_expo)) + start_log_like = prepareTree(nodes, patterns, rates) + + + newf.write('%s\t'%(mcmc)) + print 'mcmc gen=', mcmc + print start_log_like, start_log_prior, + + newf.write( '%.6f\t%.6f\t'%(start_log_like,start_log_prior)) + for nl in postorder: + newf.write( '%.6f\t'%(nl.edgelen)) + print nl.edgelen, + print + print '**************************' + + newf.write('\n') +# print + for r in range(n_gen): + for i in range(len(postorder)): + preedgelen = nodes[i].edgelen + currentlike = prepareTree(nodes, patterns, rates) +# currentlike = 0.0 + currentprior = 0.0 + for nd in nodes: + currentprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) + current_ln_posterior = currentlike + currentprior + + u = random.random() + m = exp(0.2*(u-0.5)) + nodes[i].edgelen = preedgelen*m + proposedprior = 0.0 + for nd in nodes: + proposedprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) + + proposedlike = prepareTree(nodes, patterns, rates) + proposed_ln_posterior = proposedlike + proposedprior + hastings_ratio = log(m) + logR = proposed_ln_posterior - current_ln_posterior + hastings_ratio + u2 = random.random() + if log(u2) < logR: + nodes[i].edgelen = nodes[i].edgelen + log_prior = proposedprior + log_likelihood = proposedlike +# print 'log(u2) < logR so new proposal accepted..' + else: + nodes[i].edgelen = preedgelen + log_prior = currentprior + log_likelihood = currentlike + +# print 'log(u2) > logR so failed to accept new proposal..' + + if (r+1) % save_every == 0: + newf.write('%s\t'%(mcmc+1)) + print 'mcmc gen=', mcmc+1 + print log_likelihood,log_prior, + newf.write( '%.6f\t%.6f\t'%(log_likelihood,log_prior)) + for j,k in enumerate(nodes): + newf.write( '%.6f\t'%(k.edgelen)) + print k.edgelen, + newf.write('\n') + print + print '**************************' + + newf.flush() + + mcmc+=1 + + +def treenewick(): + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + path = os.path.join(script_dir, tree_file_name) + with open(path, 'r') as content: + newick = content.read() + return newick +# + +def gammaRates(alpha): + bounds = [0.0, 0.25, 0.50, 0.75, 1.] + rates = [] + for i in range(4): +# print i + lower = gamma.ppf(bounds[i], alpha, 0, 1./alpha) + upper = gamma.ppf(bounds[i+1], alpha, 0, 1./alpha) + mean_rate = ((gamma.cdf(upper, alpha+1., 0, 1./alpha) - gamma.cdf(lower, alpha+1., 0, 1./alpha)))*4. + rates.append(mean_rate) + return rates + +def prepareTree(postorder, patterns, rates): + likelihood_lists = [] + for nd in postorder: + likelihood_lists.append(allocatePartial(nd, patterns, rates)) +# print 'log-likelihood of the topology =', likelihood_lists[-1] + return likelihood_lists[-1] + +def joinRandomPair(node_list, next_node_number, is_deep_coalescence): + # pick first of two lineages to join and delete from node_list + i = random.randint(1, len(node_list)) + ndi = node_list[i-1] + del node_list[i-1] + + # pick second of two lineages to join and delete from node_list + j = random.randint(1, len(node_list)) + ndj = node_list[j-1] + del node_list[j-1] + + # join selected nodes and add ancestor to node_list + ancnd = node(next_node_number) + ancnd.deep = is_deep_coalescence + ancnd.lchild = ndi + ancnd.descendants = set() + ancnd.descendants |= ndi.descendants + ancnd.descendants |= ndj.descendants + ndi.rsib = ndj + ndi.par = ancnd + ndj.par = ancnd + node_list.append(ancnd) + + return node_list + + +def makeNewick(nd, brlen_scaler = 1.0, start = True): # + global _newick + global _TL + + if start: + _newick = '' + _TL = 0.0 + + if nd.lchild: + _newick += '(' + makeNewick(nd.lchild, brlen_scaler, False) + + else: + blen = nd.edgelen*brlen_scaler + _TL += blen + _newick += '%d:%.5f' % (nd.number, blen) + + if nd.rsib: + _newick += ',' + makeNewick(nd.rsib, brlen_scaler, False) + elif nd.par is not None: + blen = nd.par.edgelen*brlen_scaler + _TL += blen + _newick += '):%.3f' % blen + + return _newick, _TL + +def calcActualHeight(root): + h = 0.0 + nd = root + while nd.lchild: + nd = nd.lchild + h += nd.edgelen + return h + + +def readnewick(tree): + total_length = len(tree) + internal_node_number = -1 + + root = node(internal_node_number) + nd = root + i = 0 + pre = [root] + while i < total_length: + m = tree[i] + + if m =='(': + internal_node_number -= 1 + + child = node(internal_node_number) + pre.append(child) + nd.lchild=child + + child.par=nd + nd=child + elif m == ',': + internal_node_number -= 1 + rsib = node(internal_node_number) + pre.append(rsib) + nd.rsib = rsib + rsib.par=nd.par + nd = rsib + elif m == ')': + nd = nd.par + + elif m == ':': + edge_len_str = '' + i+=1 + m = tree[i] + assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] + while m in ['0','1','2','3','4','5','6','7','8', '9','.']: + edge_len_str += m + i+=1 + m = tree[i] + i -=1 + nd.edgelen = float(edge_len_str) + + + else: + internal_node_number += 1 + + if True: + assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m + mm = '' + while m in ['0','1','2','3','4','5','6','7','8', '9' ]: + + mm += m + + i += 1 + m = tree[i] + nd.number = int(mm) + i -= 1 + + i += 1 + + post = pre[:] + post.reverse() + return post + +def Makenewick(pre): + newickstring = '' + for i,nd in enumerate(pre): + if nd.lchild: + newickstring += '(' + + elif nd.rsib: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + newickstring += ',' + + else: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + tmpnd = nd + while (tmpnd.par is not None) and (tmpnd.rsib is None): + newickstring += ')' + newickstring += ':%.1f' % tmpnd.par.edgelen + tmpnd = tmpnd.par + + if tmpnd.par is not None: + newickstring += ',' + return newickstring + +###################yule tree################################################### +# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree +# - num_species is the number of leaves (tips) in the tree +def calcPhi(num_species): + phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) + return phi + +# yuleTree creates a species tree in which edge lengths are measured in +# expected number of substitutions. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def yuleTree(num_species, mu_over_s): + # create num_species nodes numbered 1, 2, ..., num_species + nodes = [node(i+1) for i in range(num_species)] + + next_node_number = num_species + 1 + while len(nodes) > 1: + # choose a speciation time in generations + K = float(len(nodes)) + mean_epoch_length = mu_over_s/K + t = random.gammavariate(1.0, mean_epoch_length) + + # update each node's edgelen + for n in nodes: + n.edgelen += t # same as: n.edgelen = n.edgelen + t + + nodes = joinRandomPair(nodes, next_node_number, False) + next_node_number += 1 + + return nodes[0] + +# calcExpectedHeight returns the expected height of the species tree in terms of +# expected number of substitutions from the root to one tip. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def calcExpectedHeight(num_species, mu_over_s): + return mu_over_s*calcPhi(num_species) + + +if __name__ == '__main__': + random_seed = 348889 # 7632557, 12345 + number_of_species = 5 + mutation_speciation_rate_ratio = 0.689655172 # 0.689655172 # yields tree height 1 for 6 species + random.seed(random_seed) + species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) +# print '#########' +# print species_tree_root + newick = makeNewick(species_tree_root) +# print 'Random number seed: %d' % random_seed +# print 'Simulating one tree:' +# print ' number of species = %d' % number_of_species +# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio +# print ' actual tree length =',newick[1] + expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) +# print ' expected height =',expected_height + actual_height = calcActualHeight(species_tree_root) +# print ' actual height =',actual_height + print 'true tree: ',newick[0] + print '**************************' + +# yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' + rates_list = gammaRates(alpha) + postorder = readnewick(treenewick()) + result = prepareTree(postorder, readSeq.patterns(sequence_file), rates_list) +# try1 = readSeq.patterns() + result2 = mcmcbrn(postorder, readSeq.patterns(sequence_file), rates_list) diff --git a/brnlenMCMC/readSeq.py b/brnlenMCMC/readSeq.py new file mode 100644 index 0000000..91c58a5 --- /dev/null +++ b/brnlenMCMC/readSeq.py @@ -0,0 +1,69 @@ +def patterns(sequence_file): + # + import re, os, glob, itertools, fnmatch, sys, shutil + from itertools import combinations + from collections import Counter + + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) +# path = os.path.join(script_dir, 'nexus') + + genes = [] + data = {} +# print 'Reading nexus files...' + for filename in glob.glob(os.path.join(script_dir, sequence_file)): + + m = re.match('(.+).nex', os.path.basename(filename)) + gene_name = m.group(1) +# print 'gene_name=', gene_name + genes.append(gene_name) + f = open(filename, 'r').read() + + m = re.search('ntax\s*=\s*(\d+)', f, re.M | re.S) + ntax = int(m.group(1)) +# print 'ntax=', ntax + + m = re.search('nchar\s*=\s*(\d+)', f, re.M | re.S) + nchar = int(m.group(1)) +# print 'nchar=', nchar + + m = re.search('Matrix\s+(.+?);', f, re.M | re.S) + matrix = m.group(1).strip() + matrix_lines = matrix.split('\n') + + taxon_names = [] + sequences = {} + sequences_list = [] + for line in matrix_lines: + parts = line.strip().split() + assert len(parts) == 2 + taxon_name = parts[0] + sequence = parts[1] + + taxon_names.append(taxon_name) + sequences_list.append(sequence) + sequences[taxon_name] = sequence + + pattern_list = [] + + k=0 + while k < nchar: + site_pattern = '' + for i,m in enumerate(sequences_list): + site_pattern += m[k] + pattern_list.append(site_pattern) + k+=1 + pattern_dict = dict() + for i in pattern_list: + pattern_dict[i] = pattern_dict.get(i, 0) + 1 + + tmp = [] + for key in pattern_dict.keys(): ###convert dict to key of tupules +# print 'key=', key + tmp.append((pattern_dict[key],key)) + + sorted_values = sorted(tmp) ###sorted according to key smaller to larger + sorted_values.sort(cmp = lambda x,y:cmp(x[1],y[1])) ###sorted according to values in alphabetical order + + return pattern_dict + + diff --git a/brnlenMCMC/readtree.py b/brnlenMCMC/readtree.py new file mode 100644 index 0000000..a7b4943 --- /dev/null +++ b/brnlenMCMC/readtree.py @@ -0,0 +1,22 @@ +# def treenewck(): +# script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) +# path = os.path.join(script_dir, 'nexus') +# for filename in glob.glob(os.path.join(path, '*.tre*')): +# f = open(filename, 'r').read() + +import re, os, glob, itertools, fnmatch, sys, shutil +# dirname, filename = os.path.split(os.path.abspath(__file__)) +def treenewick(): + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) +# print script_dir + path = os.path.join(script_dir, 'tree.tre') + with open(path, 'r') as content: + newick = content.read() + return newick +a = treenewick() +print a + + +# dirname, filename = os.path.split(os.path.abspath(__file__)) +# print "running from", dirname +# print "file is", filename \ No newline at end of file diff --git a/brnlenMCMC/tree.tre b/brnlenMCMC/tree.tre new file mode 100644 index 0000000..2d84738 --- /dev/null +++ b/brnlenMCMC/tree.tre @@ -0,0 +1 @@ +(5:1.8601,((3:0.47109,2:0.47109):0.492,(4:0.05805,1:0.05805):0.906):0.896) \ No newline at end of file From 938c6fd892e3dc572ac91283f58c65b1a40e565b Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:53:23 -0400 Subject: [PATCH 02/11] Delete brnlenMCMC.py --- brnlenMCMC.py | 494 -------------------------------------------------- 1 file changed, 494 deletions(-) delete mode 100644 brnlenMCMC.py diff --git a/brnlenMCMC.py b/brnlenMCMC.py deleted file mode 100644 index 87c6160..0000000 --- a/brnlenMCMC.py +++ /dev/null @@ -1,494 +0,0 @@ -########################################################################################## -# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree -# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model, -# and performs MCMC on branch length parameter -########################################################################################## - - -import readSeq -import random -import re, os, itertools, sys, glob -from itertools import chain -from scipy.stats import gamma -from math import exp, log - - -########################################################################################## -tree_file_name = 'tree.tre' -sequence_file = 'example3.nex' -alpha = 0.5 #gamma shape parameter for rate categories -n_gen = 50000 -save_every = 50 -mean_expo = 10. #mean_expo = mean of exponential distribution for branch length prior -########################################################################################## - - - - -class node(object): - def __init__(self, ndnum): # initialization function - self.rsib = None # right sibling - self.lchild = None # left child - self.par = None # parent node - self.number = ndnum # node number (internals negative, tips 0 or positive) - self.edgelen = 0.0 # branch length - self.descendants = set([ndnum]) # set containing descendant leaf set - self.partial = None # will have length 4*npatterns - - - def __str__(self): - # __str__ is a built-in function that is used by print to show an object - descendants_as_string = ','.join(['%d' % d for d in self.descendants]) - - lchildstr = 'None' - if self.lchild is not None: - lchildstr = '%d' % self.lchild.number - - rsibstr = 'None' - if self.rsib is not None: - rsibstr = '%d' % self.rsib.number - - parstr = 'None' - if self.par is not None: - parstr = '%d' % self.par.number - - return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - - - -def allocatePartial(node, patterns, rates): - if node.number > 0: - npatterns = len(patterns) - - if node.partial is None: - node.partial = [0.0]*(4*4*npatterns) -# print len(node.partial) - for i,pattern in enumerate(patterns.keys()): - base = pattern[node.number-1] - for l in range(4): - if base == 'A': - - - node.partial[i*16+l*4 + 0] = 1.0 - elif base == 'C': - node.partial[i*16+l*4 + 1] = 1.0 - elif base == 'G': - node.partial[i*16+l*4 + 2] = 1.0 - elif base == 'T': - node.partial[i*16+l*4 + 3] = 1.0 - else: - assert(False), 'oops, something went horribly wrong!' - - else: - - npatterns = len(patterns) - if node.partial is None: - node.partial = [0.0]*(4*4*npatterns) - - like_list = [] - for i,pattern in enumerate(patterns.keys()): - m_list = [] - num_pattern = patterns[pattern] - - for l,m in enumerate(rates): - - psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - pdiff = (0.25-0.25*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - - psame2 = (0.25+0.75*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - pdiff2 = (0.25-0.25*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - - num_pattern = patterns[pattern] - pAA = psame*(node.lchild.partial[i*16+l*4 + 0]) - pAC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pAG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pAT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pAA2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pAC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pAG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pAT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromA_lchild = pAA+pAC+pAG+pAT - pfromA_rchild = pAA2+pAC2+pAG2+pAT2 - node.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild - - - ###################################################### - - pCA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pCC = psame*(node.lchild.partial[i*16+l*4 + 1]) - pCG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pCT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pCA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pCC2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pCG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pCT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromC_lchild = pCA+pCC+pCG+pCT - pfromC_rchild = pCA2+pCC2+pCG2+pCT2 - node.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild - - ####################################################### -# - pGA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pGC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pGG = psame*(node.lchild.partial[i*16+l*4 + 2]) - pGT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pGA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pGC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pGG2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pGT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromG_lchild = pGA+pGC+pGG+pGT - pfromG_rchild = pGA2+pGC2+pGG2+pGT2 - node.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild - - ####################################################### - - pTA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pTC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pTG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pTT = psame*(node.lchild.partial[i*16+l*4 + 3]) - - pTA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pTC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pTG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pTT2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromT_lchild = pTA+pTC+pTG+pTT - pfromT_rchild = pTA2+pTC2+pTG2+pTT2 - node.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild - - site_like = (sum(node.partial[i*16:i*16+16]))*0.25*0.25 - site_log_like = (log(site_like))*num_pattern - like_list.append(site_log_like) - log_like = sum(like_list) - return log_like - - - -def mcmcbrn(postorder, patterns, rates): - nodes = readnewick(treenewick()) - mcmc = 0 - output = os.path.join('brnlenMCMC_results.txt') - newf = open(output, 'w') - newf.write('%s\t'%('n_gen')) - newf.write( '%s\t%s\t'%('LnL','LnPr')) - for nl in postorder: - newf.write( 'node%s\t'%(nl.number)) - newf.write('\n') - start_log_prior = 0.0 - for nd in nodes: - start_log_prior += (-nd.edgelen/mean_expo)-(log(mean_expo)) - start_log_like = prepareTree(nodes, patterns, rates) - - - newf.write('%s\t'%(mcmc)) - print 'mcmc gen=', mcmc - print start_log_like, start_log_prior, - - newf.write( '%.6f\t%.6f\t'%(start_log_like,start_log_prior)) - for nl in postorder: - newf.write( '%.6f\t'%(nl.edgelen)) - print nl.edgelen, - print - print '**************************' - - newf.write('\n') -# print - for r in range(n_gen): - for i in range(len(postorder)): - preedgelen = nodes[i].edgelen - currentlike = prepareTree(nodes, patterns, rates) -# currentlike = 0.0 - currentprior = 0.0 - for nd in nodes: - currentprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) - current_ln_posterior = currentlike + currentprior - - u = random.random() - m = exp(0.2*(u-0.5)) - nodes[i].edgelen = preedgelen*m - proposedprior = 0.0 - for nd in nodes: - proposedprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) - - proposedlike = prepareTree(nodes, patterns, rates) - proposed_ln_posterior = proposedlike + proposedprior - hastings_ratio = log(m) - logR = proposed_ln_posterior - current_ln_posterior + hastings_ratio - u2 = random.random() - if log(u2) < logR: - nodes[i].edgelen = nodes[i].edgelen - log_prior = proposedprior - log_likelihood = proposedlike -# print 'log(u2) < logR so new proposal accepted..' - else: - nodes[i].edgelen = preedgelen - log_prior = currentprior - log_likelihood = currentlike - -# print 'log(u2) > logR so failed to accept new proposal..' - - if (r+1) % save_every == 0: - newf.write('%s\t'%(mcmc+1)) - print 'mcmc gen=', mcmc+1 - print log_likelihood,log_prior, - newf.write( '%.6f\t%.6f\t'%(log_likelihood,log_prior)) - for j,k in enumerate(nodes): - newf.write( '%.6f\t'%(k.edgelen)) - print k.edgelen, - newf.write('\n') - print - print '**************************' - - newf.flush() - - mcmc+=1 - - -def treenewick(): - script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - path = os.path.join(script_dir, tree_file_name) - with open(path, 'r') as content: - newick = content.read() - return newick -# - -def gammaRates(alpha): - bounds = [0.0, 0.25, 0.50, 0.75, 1.] - rates = [] - for i in range(4): -# print i - lower = gamma.ppf(bounds[i], alpha, 0, 1./alpha) - upper = gamma.ppf(bounds[i+1], alpha, 0, 1./alpha) - mean_rate = ((gamma.cdf(upper, alpha+1., 0, 1./alpha) - gamma.cdf(lower, alpha+1., 0, 1./alpha)))*4. - rates.append(mean_rate) - return rates - -def prepareTree(postorder, patterns, rates): - likelihood_lists = [] - for nd in postorder: - likelihood_lists.append(allocatePartial(nd, patterns, rates)) -# print 'log-likelihood of the topology =', likelihood_lists[-1] - return likelihood_lists[-1] - -def joinRandomPair(node_list, next_node_number, is_deep_coalescence): - # pick first of two lineages to join and delete from node_list - i = random.randint(1, len(node_list)) - ndi = node_list[i-1] - del node_list[i-1] - - # pick second of two lineages to join and delete from node_list - j = random.randint(1, len(node_list)) - ndj = node_list[j-1] - del node_list[j-1] - - # join selected nodes and add ancestor to node_list - ancnd = node(next_node_number) - ancnd.deep = is_deep_coalescence - ancnd.lchild = ndi - ancnd.descendants = set() - ancnd.descendants |= ndi.descendants - ancnd.descendants |= ndj.descendants - ndi.rsib = ndj - ndi.par = ancnd - ndj.par = ancnd - node_list.append(ancnd) - - return node_list - - -def makeNewick(nd, brlen_scaler = 1.0, start = True): # - global _newick - global _TL - - if start: - _newick = '' - _TL = 0.0 - - if nd.lchild: - _newick += '(' - makeNewick(nd.lchild, brlen_scaler, False) - - else: - blen = nd.edgelen*brlen_scaler - _TL += blen - _newick += '%d:%.5f' % (nd.number, blen) - - if nd.rsib: - _newick += ',' - makeNewick(nd.rsib, brlen_scaler, False) - elif nd.par is not None: - blen = nd.par.edgelen*brlen_scaler - _TL += blen - _newick += '):%.3f' % blen - - return _newick, _TL - -def calcActualHeight(root): - h = 0.0 - nd = root - while nd.lchild: - nd = nd.lchild - h += nd.edgelen - return h - - -def readnewick(tree): - total_length = len(tree) - internal_node_number = -1 - - root = node(internal_node_number) - nd = root - i = 0 - pre = [root] - while i < total_length: - m = tree[i] - - if m =='(': - internal_node_number -= 1 - - child = node(internal_node_number) - pre.append(child) - nd.lchild=child - - child.par=nd - nd=child - elif m == ',': - internal_node_number -= 1 - rsib = node(internal_node_number) - pre.append(rsib) - nd.rsib = rsib - rsib.par=nd.par - nd = rsib - elif m == ')': - nd = nd.par - - elif m == ':': - edge_len_str = '' - i+=1 - m = tree[i] - assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] - while m in ['0','1','2','3','4','5','6','7','8', '9','.']: - edge_len_str += m - i+=1 - m = tree[i] - i -=1 - nd.edgelen = float(edge_len_str) - - - else: - internal_node_number += 1 - - if True: - assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m - mm = '' - while m in ['0','1','2','3','4','5','6','7','8', '9' ]: - - mm += m - - i += 1 - m = tree[i] - nd.number = int(mm) - i -= 1 - - i += 1 - - post = pre[:] - post.reverse() - return post - -def Makenewick(pre): - newickstring = '' - for i,nd in enumerate(pre): - if nd.lchild: - newickstring += '(' - - elif nd.rsib: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - newickstring += ',' - - else: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - tmpnd = nd - while (tmpnd.par is not None) and (tmpnd.rsib is None): - newickstring += ')' - newickstring += ':%.1f' % tmpnd.par.edgelen - tmpnd = tmpnd.par - - if tmpnd.par is not None: - newickstring += ',' - return newickstring - -###################yule tree################################################### -# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree -# - num_species is the number of leaves (tips) in the tree -def calcPhi(num_species): - phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) - return phi - -# yuleTree creates a species tree in which edge lengths are measured in -# expected number of substitutions. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def yuleTree(num_species, mu_over_s): - # create num_species nodes numbered 1, 2, ..., num_species - nodes = [node(i+1) for i in range(num_species)] - - next_node_number = num_species + 1 - while len(nodes) > 1: - # choose a speciation time in generations - K = float(len(nodes)) - mean_epoch_length = mu_over_s/K - t = random.gammavariate(1.0, mean_epoch_length) - - # update each node's edgelen - for n in nodes: - n.edgelen += t # same as: n.edgelen = n.edgelen + t - - nodes = joinRandomPair(nodes, next_node_number, False) - next_node_number += 1 - - return nodes[0] - -# calcExpectedHeight returns the expected height of the species tree in terms of -# expected number of substitutions from the root to one tip. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def calcExpectedHeight(num_species, mu_over_s): - return mu_over_s*calcPhi(num_species) - - -if __name__ == '__main__': - random_seed = 348889 # 7632557, 12345 - number_of_species = 5 - mutation_speciation_rate_ratio = 0.689655172 # 0.689655172 # yields tree height 1 for 6 species - random.seed(random_seed) - species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) -# print '#########' -# print species_tree_root - newick = makeNewick(species_tree_root) -# print 'Random number seed: %d' % random_seed -# print 'Simulating one tree:' -# print ' number of species = %d' % number_of_species -# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio -# print ' actual tree length =',newick[1] - expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) -# print ' expected height =',expected_height - actual_height = calcActualHeight(species_tree_root) -# print ' actual height =',actual_height - print 'true tree: ',newick[0] - print '**************************' - -# yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' - rates_list = gammaRates(alpha) - postorder = readnewick(treenewick()) - result = prepareTree(postorder, readSeq.patterns(sequence_file), rates_list) -# try1 = readSeq.patterns() - result2 = mcmcbrn(postorder, readSeq.patterns(sequence_file), rates_list) From 9368821e7aa6ac32b6968be3d3bbbf0b6a05675e Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:53:36 -0400 Subject: [PATCH 03/11] Delete treeLike_JCGAMMA.py --- treeLike_JCGAMMA.py | 413 -------------------------------------------- 1 file changed, 413 deletions(-) delete mode 100644 treeLike_JCGAMMA.py diff --git a/treeLike_JCGAMMA.py b/treeLike_JCGAMMA.py deleted file mode 100644 index 6a777cf..0000000 --- a/treeLike_JCGAMMA.py +++ /dev/null @@ -1,413 +0,0 @@ -################################## -# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree -# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model -################################### - - -import readSeq -import random -import re, os, itertools, sys, glob -from itertools import chain -from scipy.stats import gamma -from math import exp, log - -########################################################################################## -tree_file_name = 'tree.tre' -sequence_file = 'example3.nex' -alpha = 0.5 #gamma shape parameter for rate categories -########################################################################################## - - - - - -class node(object): - def __init__(self, ndnum): # initialization function - self.rsib = None # right sibling - self.lchild = None # left child - self.par = None # parent node - self.number = ndnum # node number (internals negative, tips 0 or positive) - self.edgelen = 0.0 # branch length - self.descendants = set([ndnum]) # set containing descendant leaf set - self.partial = None # will have length 4*npatterns - - - - - def __str__(self): - # __str__ is a built-in function that is used by print to show an object - descendants_as_string = ','.join(['%d' % d for d in self.descendants]) - - lchildstr = 'None' - if self.lchild is not None: - lchildstr = '%d' % self.lchild.number - - rsibstr = 'None' - if self.rsib is not None: - rsibstr = '%d' % self.rsib.number - - parstr = 'None' - if self.par is not None: - parstr = '%d' % self.par.number - - return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - - -def allocatePartial(node, patterns, rates): - if node.number > 0: - npatterns = len(patterns) -# print 'npat', npatterns - node.partial = [0.0]*(4*4*npatterns) -# print len(node.partial) - for i,pattern in enumerate(patterns.keys()): - base = pattern[node.number-1] - for l in range(4): - if base == 'A': - node.partial[i*16+l*4 + 0] = 1.0 - elif base == 'C': - node.partial[i*16+l*4 + 1] = 1.0 - elif base == 'G': - node.partial[i*16+l*4 + 2] = 1.0 - elif base == 'T': - node.partial[i*16+l*4 + 3] = 1.0 - else: - assert(False), 'oops, something went horribly wrong!' - - else: -# rt = [0.03338775, 0.25191592, 0.82026848, 2.89442785] -# rt = [2.89442785] - -# rt = [1.0, 1.0, 1.0, 1.0] - - npatterns = len(patterns) -# print 'patterns=', patterns - node.partial = [0.0]*(4*4*npatterns) - like_list = [] - for i,pattern in enumerate(patterns.keys()): -# print i, pattern, patterns.keys() - m_list = [] - num_pattern = patterns[pattern] -# print num_pattern - - for l,m in enumerate(rates): - - - psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - pdiff = (0.25-0.25*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - - psame2 = (0.25+0.75*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - pdiff2 = (0.25-0.25*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - - num_pattern = patterns[pattern] - pAA = psame*(node.lchild.partial[i*16+l*4 + 0]) - pAC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pAG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pAT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pAA2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pAC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pAG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pAT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromA_lchild = pAA+pAC+pAG+pAT - pfromA_rchild = pAA2+pAC2+pAG2+pAT2 - node.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild - - - ###################################################### - - pCA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pCC = psame*(node.lchild.partial[i*16+l*4 + 1]) - pCG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pCT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pCA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pCC2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pCG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pCT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromC_lchild = pCA+pCC+pCG+pCT - pfromC_rchild = pCA2+pCC2+pCG2+pCT2 - node.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild - - ####################################################### -# - pGA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pGC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pGG = psame*(node.lchild.partial[i*16+l*4 + 2]) - pGT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pGA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pGC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pGG2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pGT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromG_lchild = pGA+pGC+pGG+pGT - pfromG_rchild = pGA2+pGC2+pGG2+pGT2 - node.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild - - ####################################################### - - pTA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pTC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pTG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pTT = psame*(node.lchild.partial[i*16+l*4 + 3]) - - pTA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pTC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pTG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pTT2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromT_lchild = pTA+pTC+pTG+pTT - pfromT_rchild = pTA2+pTC2+pTG2+pTT2 - node.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild - - site_like = (sum(node.partial[i*16:i*16+16]))*0.25*0.25 - site_log_like = (log(site_like))*num_pattern - like_list.append(site_log_like) - log_like = sum(like_list) - return log_like - - -def treenewick(): - script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - path = os.path.join(script_dir, tree_file_name) - with open(path, 'r') as content: - newick = content.read() - return newick -# - -def gammaRates(alpha): - bounds = [0.0, 0.25, 0.50, 0.75, 1.] - rates = [] - for i in range(4): -# print i - lower = gamma.ppf(bounds[i], alpha, 0, 1./alpha) - upper = gamma.ppf(bounds[i+1], alpha, 0, 1./alpha) - mean_rate = ((gamma.cdf(upper, alpha+1., 0, 1./alpha) - gamma.cdf(lower, alpha+1., 0, 1./alpha)))*4. - rates.append(mean_rate) - return rates - -def prepareTree(postorder, patterns, rates): - likelihood_lists = [] - for nd in postorder: - likelihood_lists.append(allocatePartial(nd, patterns, rates)) - print 'log-likelihood of the topology =', likelihood_lists[-1] - - -def joinRandomPair(node_list, next_node_number, is_deep_coalescence): - # pick first of two lineages to join and delete from node_list - i = random.randint(1, len(node_list)) - ndi = node_list[i-1] - del node_list[i-1] - - # pick second of two lineages to join and delete from node_list - j = random.randint(1, len(node_list)) - ndj = node_list[j-1] - del node_list[j-1] - - # join selected nodes and add ancestor to node_list - ancnd = node(next_node_number) - ancnd.deep = is_deep_coalescence - ancnd.lchild = ndi - ancnd.descendants = set() - ancnd.descendants |= ndi.descendants - ancnd.descendants |= ndj.descendants - ndi.rsib = ndj - ndi.par = ancnd - ndj.par = ancnd - node_list.append(ancnd) - - return node_list - - -def makeNewick(nd, brlen_scaler = 1.0, start = True): # - global _newick - global _TL - - if start: - _newick = '' - _TL = 0.0 - - if nd.lchild: - _newick += '(' - makeNewick(nd.lchild, brlen_scaler, False) - - else: - blen = nd.edgelen*brlen_scaler - _TL += blen - _newick += '%d:%.5f' % (nd.number, blen) - - if nd.rsib: - _newick += ',' - makeNewick(nd.rsib, brlen_scaler, False) - elif nd.par is not None: - blen = nd.par.edgelen*brlen_scaler - _TL += blen - _newick += '):%.3f' % blen - - return _newick, _TL - -def calcActualHeight(root): - h = 0.0 - nd = root - while nd.lchild: - nd = nd.lchild - h += nd.edgelen - return h - - -def readnewick(tree): - total_length = len(tree) - internal_node_number = -1 - - root = node(internal_node_number) - nd = root - i = 0 - pre = [root] - while i < total_length: - m = tree[i] - - if m =='(': - internal_node_number -= 1 - - child = node(internal_node_number) - pre.append(child) - nd.lchild=child - - child.par=nd - nd=child - elif m == ',': - internal_node_number -= 1 - rsib = node(internal_node_number) - pre.append(rsib) - nd.rsib = rsib - rsib.par=nd.par - nd = rsib - elif m == ')': - nd = nd.par - - elif m == ':': - edge_len_str = '' - i+=1 - m = tree[i] - assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] - while m in ['0','1','2','3','4','5','6','7','8', '9','.']: - edge_len_str += m - i+=1 - m = tree[i] - i -=1 - nd.edgelen = float(edge_len_str) - - - else: - internal_node_number += 1 - - if True: - assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m - mm = '' - while m in ['0','1','2','3','4','5','6','7','8', '9' ]: - - mm += m - - i += 1 - m = tree[i] - nd.number = int(mm) - i -= 1 - - i += 1 - - post = pre[:] - post.reverse() - for nd in post: - print nd.number, nd.edgelen - return post - -def Makenewick(pre): - newickstring = '' - for i,nd in enumerate(pre): - if nd.lchild: - newickstring += '(' - - elif nd.rsib: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - newickstring += ',' - - else: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - tmpnd = nd - while (tmpnd.par is not None) and (tmpnd.rsib is None): - newickstring += ')' - newickstring += ':%.1f' % tmpnd.par.edgelen - tmpnd = tmpnd.par - - if tmpnd.par is not None: - newickstring += ',' - return newickstring - -###################yule tree################################################### -# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree -# - num_species is the number of leaves (tips) in the tree -def calcPhi(num_species): - phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) - return phi - -# yuleTree creates a species tree in which edge lengths are measured in -# expected number of substitutions. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def yuleTree(num_species, mu_over_s): - # create num_species nodes numbered 1, 2, ..., num_species - nodes = [node(i+1) for i in range(num_species)] - - next_node_number = num_species + 1 - while len(nodes) > 1: - # choose a speciation time in generations - K = float(len(nodes)) - mean_epoch_length = mu_over_s/K - t = random.gammavariate(1.0, mean_epoch_length) - - # update each node's edgelen - for n in nodes: - n.edgelen += t # same as: n.edgelen = n.edgelen + t - - nodes = joinRandomPair(nodes, next_node_number, False) - next_node_number += 1 - - return nodes[0] - -# calcExpectedHeight returns the expected height of the species tree in terms of -# expected number of substitutions from the root to one tip. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def calcExpectedHeight(num_species, mu_over_s): - return mu_over_s*calcPhi(num_species) - - -if __name__ == '__main__': - random_seed = 24553 # 7632557, 12345 - number_of_species = 5 - mutation_speciation_rate_ratio = 0.4 # 0.689655172 # yields tree height 1 for 6 species - random.seed(random_seed) - species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) -# print '#########' -# print species_tree_root - newick = makeNewick(species_tree_root) -# print 'Random number seed: %d' % random_seed -# print 'Simulating one tree:' -# print ' number of species = %d' % number_of_species -# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio -# print ' actual tree length =',newick[1] - expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) -# print ' expected height =',expected_height - actual_height = calcActualHeight(species_tree_root) -# print ' actual height =',actual_height -# print ' newick: ',newick[0] - - alpha = 0.5 ### gamma shape parameter rate categories -# yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' - rates_list = gammaRates(alpha) - postorder = readnewick(treenewick()) - result = prepareTree(postorder, readSeq.patterns(sequence_file), rates_list) From c51f63b08358e5c2ce494388b4370fb84daac6af Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:53:43 -0400 Subject: [PATCH 04/11] Delete treeLike.py --- treeLike.py | 374 ---------------------------------------------------- 1 file changed, 374 deletions(-) delete mode 100644 treeLike.py diff --git a/treeLike.py b/treeLike.py deleted file mode 100644 index 1142e5e..0000000 --- a/treeLike.py +++ /dev/null @@ -1,374 +0,0 @@ -################################## -# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree -# topology, and computes log-likelihood of the topology under Jukes Cantor model -################################### - - -import readSeq -import random -import re, os, itertools, sys, glob -from itertools import chain -from math import exp, log - -########################################################################################## -tree_file_name = 'tree.tre' -sequence_file = 'example3.nex' -########################################################################################## - - -class node(object): - def __init__(self, ndnum): # initialization function - self.rsib = None # right sibling - self.lchild = None # left child - self.par = None # parent node - self.number = ndnum # node number (internals negative, tips 0 or positive) - self.edgelen = 0.0 # branch length - self.descendants = set([ndnum]) # set containing descendant leaf set - self.partial = None # will have length 4*npatterns - - - def __str__(self): - # __str__ is a built-in function that is used by print to show an object - descendants_as_string = ','.join(['%d' % d for d in self.descendants]) - - lchildstr = 'None' - if self.lchild is not None: - lchildstr = '%d' % self.lchild.number - - rsibstr = 'None' - if self.rsib is not None: - rsibstr = '%d' % self.rsib.number - - parstr = 'None' - if self.par is not None: - parstr = '%d' % self.par.number - - return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - -def allocatePartial(node, patterns): - if node.number > 0: - npatterns = len(patterns) - node.partial = [0.0]*(4*npatterns) - for i,pattern in enumerate(patterns.keys()): - base = pattern[node.number-1] - if base == 'A': - node.partial[i*4 + 0] = 1.0 - elif base == 'C': - node.partial[i*4 + 1] = 1.0 - elif base == 'G': - node.partial[i*4 + 2] = 1.0 - elif base == 'T': - node.partial[i*4 + 3] = 1.0 - else: - assert(False), 'oops, something went horribly wrong!' - - else: - npatterns = len(patterns) - node.partial = [0.0]*(4*npatterns) - like_list = [] - for i,pattern in enumerate(patterns.keys()): - psame = (0.25+0.75*exp(-4.0*(node.lchild.edgelen)/3.0)) - pdiff = (0.25-0.25*exp(-4.0*(node.lchild.edgelen)/3.0)) - - psame2 = (0.25+0.75*exp(-4.0*(node.lchild.rsib.edgelen)/3.0)) - pdiff2 = (0.25-0.25*exp(-4.0*(node.lchild.rsib.edgelen)/3.0)) - - num_pattern = patterns[pattern] - - pAA = psame*(node.lchild.partial[i*4 + 0]) - pAC = pdiff*(node.lchild.partial[i*4 + 1]) - pAG = pdiff*(node.lchild.partial[i*4 + 2]) - pAT = pdiff*(node.lchild.partial[i*4 + 3]) - - pAA2 = psame2*(node.lchild.rsib.partial[i*4 + 0]) - pAC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1]) - pAG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2]) - pAT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3]) - - pfromA_lchild = pAA+pAC+pAG+pAT - pfromA_rchild = pAA2+pAC2+pAG2+pAT2 - node.partial[i*4 + 0] = pfromA_lchild*pfromA_rchild - - ###################################################### - - pCA = pdiff*(node.lchild.partial[i*4 + 0]) - pCC = psame*(node.lchild.partial[i*4 + 1]) - pCG = pdiff*(node.lchild.partial[i*4 + 2]) - pCT = pdiff*(node.lchild.partial[i*4 + 3]) - - pCA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0]) - pCC2 = psame2*(node.lchild.rsib.partial[i*4 + 1]) - pCG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2]) - pCT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3]) - - pfromC_lchild = pCA+pCC+pCG+pCT - pfromC_rchild = pCA2+pCC2+pCG2+pCT2 - node.partial[i*4 + 1] = pfromC_lchild*pfromC_rchild - - ####################################################### -# - pGA = pdiff*(node.lchild.partial[i*4 + 0]) - pGC = pdiff*(node.lchild.partial[i*4 + 1]) - pGG = psame*(node.lchild.partial[i*4 + 2]) - pGT = pdiff*(node.lchild.partial[i*4 + 3]) -# - pGA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0]) - pGC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1]) - pGG2 = psame2*(node.lchild.rsib.partial[i*4 + 2]) - pGT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3]) -# - pfromG_lchild = pGA+pGC+pGG+pGT - pfromG_rchild = pGA2+pGC2+pGG2+pGT2 - node.partial[i*4 + 2] = pfromG_lchild*pfromG_rchild - - ####################################################### - - pTA = pdiff*(node.lchild.partial[i*4 + 0]) - pTC = pdiff*(node.lchild.partial[i*4 + 1]) - pTG = pdiff*(node.lchild.partial[i*4 + 2]) - pTT = psame*(node.lchild.partial[i*4 + 3]) -# - pTA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0]) - pTC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1]) - pTG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2]) - pTT2 = psame2*(node.lchild.rsib.partial[i*4 + 3]) -# - pfromT_lchild = pTA+pTC+pTG+pTT - pfromT_rchild = pTA2+pTC2+pTG2+pTT2 - node.partial[i*4 + 3] = pfromT_lchild*pfromT_rchild - - ######################################################### - - site_log_like = (log((sum(node.partial[i*4:i*4+4]))*0.25))*num_pattern - like_list.append(site_log_like) - - log_Like = sum(like_list) - return log_Like - -def treenewick(): - script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - path = os.path.join(script_dir, tree_file_name) - with open(path, 'r') as content: - newick = content.read() - return newick - -def prepareTree(postorder, patterns): - likelihood_lists = [] - for nd in postorder: - likelihood_lists.append(allocatePartial(nd, patterns)) - print 'log-likelihood of the topology =', likelihood_lists[-1] - -def joinRandomPair(node_list, next_node_number, is_deep_coalescence): - # pick first of two lineages to join and delete from node_list - i = random.randint(1, len(node_list)) - ndi = node_list[i-1] - del node_list[i-1] - - # pick second of two lineages to join and delete from node_list - j = random.randint(1, len(node_list)) - ndj = node_list[j-1] - del node_list[j-1] - - # join selected nodes and add ancestor to node_list - ancnd = node(next_node_number) - ancnd.deep = is_deep_coalescence - ancnd.lchild = ndi - ancnd.descendants = set() - ancnd.descendants |= ndi.descendants - ancnd.descendants |= ndj.descendants - ndi.rsib = ndj - ndi.par = ancnd - ndj.par = ancnd - node_list.append(ancnd) - - return node_list - - -def makeNewick(nd, brlen_scaler = 1.0, start = True): # - global _newick - global _TL - - if start: - _newick = '' - _TL = 0.0 - - if nd.lchild: - _newick += '(' - makeNewick(nd.lchild, brlen_scaler, False) - - else: - blen = nd.edgelen*brlen_scaler - _TL += blen - _newick += '%d:%.5f' % (nd.number, blen) - - if nd.rsib: - _newick += ',' - makeNewick(nd.rsib, brlen_scaler, False) - elif nd.par is not None: - blen = nd.par.edgelen*brlen_scaler - _TL += blen - _newick += '):%.3f' % blen - - return _newick, _TL - -def calcActualHeight(root): - h = 0.0 - nd = root - while nd.lchild: - nd = nd.lchild - h += nd.edgelen - return h - - -def readnewick(tree): - total_length = len(tree) - internal_node_number = -1 - - root = node(internal_node_number) - nd = root - i = 0 - pre = [root] - while i < total_length: - m = tree[i] - - if m =='(': - internal_node_number -= 1 - - child = node(internal_node_number) - pre.append(child) - nd.lchild=child - - child.par=nd - nd=child - elif m == ',': - internal_node_number -= 1 - rsib = node(internal_node_number) - pre.append(rsib) - nd.rsib = rsib - rsib.par=nd.par - nd = rsib - elif m == ')': - nd = nd.par - - elif m == ':': - edge_len_str = '' - i+=1 - m = tree[i] - assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] - while m in ['0','1','2','3','4','5','6','7','8', '9','.']: - edge_len_str += m - i+=1 - m = tree[i] - i -=1 - nd.edgelen = float(edge_len_str) - - - else: - internal_node_number += 1 - - if True: - assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m - mm = '' - while m in ['0','1','2','3','4','5','6','7','8', '9' ]: - - mm += m - - i += 1 - m = tree[i] - nd.number = int(mm) - i -= 1 - - i += 1 - - post = pre[:] - post.reverse() - return post - -def Makenewick(pre): - newickstring = '' - for i,nd in enumerate(pre): - if nd.lchild: - newickstring += '(' - - elif nd.rsib: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - newickstring += ',' - - else: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - tmpnd = nd - while (tmpnd.par is not None) and (tmpnd.rsib is None): - newickstring += ')' - newickstring += ':%.1f' % tmpnd.par.edgelen - tmpnd = tmpnd.par - - if tmpnd.par is not None: - newickstring += ',' - return newickstring - -###################yule tree################################################### -# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree -# - num_species is the number of leaves (tips) in the tree -def calcPhi(num_species): - phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) - return phi - -# yuleTree creates a species tree in which edge lengths are measured in -# expected number of substitutions. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def yuleTree(num_species, mu_over_s): - # create num_species nodes numbered 1, 2, ..., num_species - nodes = [node(i+1) for i in range(num_species)] - - next_node_number = num_species + 1 - while len(nodes) > 1: - # choose a speciation time in generations - K = float(len(nodes)) - mean_epoch_length = mu_over_s/K - t = random.gammavariate(1.0, mean_epoch_length) - - # update each node's edgelen - for n in nodes: - n.edgelen += t # same as: n.edgelen = n.edgelen + t - - nodes = joinRandomPair(nodes, next_node_number, False) - next_node_number += 1 - - return nodes[0] - -# calcExpectedHeight returns the expected height of the species tree in terms of -# expected number of substitutions from the root to one tip. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def calcExpectedHeight(num_species, mu_over_s): - return mu_over_s*calcPhi(num_species) - - -if __name__ == '__main__': - random_seed = 24553 # 7632557, 12345 - number_of_species = 5 - mutation_speciation_rate_ratio = 0.4 # 0.689655172 # yields tree height 1 for 6 species - random.seed(random_seed) - species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) -# print '#########' -# print species_tree_root - newick = makeNewick(species_tree_root) -# print 'Random number seed: %d' % random_seed -# print 'Simulating one tree:' -# print ' number of species = %d' % number_of_species -# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio -# print ' actual tree length =',newick[1] - expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) -# print ' expected height =',expected_height - actual_height = calcActualHeight(species_tree_root) -# print ' actual height =',actual_height -# print ' newick: ',newick[0] - - -# yuletree = '(((1:0.03915,5:0.03915):0.387,(4:0.42253,2:0.42253):0.004):0.118,3:0.54433)' - - postorder = readnewick(treenewick()) - result = prepareTree(postorder, readSeq.patterns(sequence_file)) \ No newline at end of file From 306e319a76b19b9bcc8ce22c2caa209d2ac1c1cc Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:53:55 -0400 Subject: [PATCH 05/11] Delete tree.tre --- tree.tre | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tree.tre diff --git a/tree.tre b/tree.tre deleted file mode 100644 index 2d84738..0000000 --- a/tree.tre +++ /dev/null @@ -1 +0,0 @@ -(5:1.8601,((3:0.47109,2:0.47109):0.492,(4:0.05805,1:0.05805):0.906):0.896) \ No newline at end of file From 8d860d53e6fed33382f1f6b27fc596df0fa696a9 Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:54:15 -0400 Subject: [PATCH 06/11] Delete simSequences.py --- simSequences.py | 174 ------------------------------------------------ 1 file changed, 174 deletions(-) delete mode 100644 simSequences.py diff --git a/simSequences.py b/simSequences.py deleted file mode 100644 index 4181da1..0000000 --- a/simSequences.py +++ /dev/null @@ -1,174 +0,0 @@ -import readSeq -import random -import re, os, itertools, sys, glob -from itertools import chain -from math import exp, log -class node(object): - def __init__(self, ndnum): # initialization function - self.rsib = None # right sibling - self.lchild = None # left child - self.par = None # parent node - self.number = ndnum # node number (internals negative, tips 0 or positive) - self.edgelen = 0.0 # branch length - self.descendants = set([ndnum]) # set containing descendant leaf set - self.partial = None # will have length 4*npatterns - self.state = None - self.states = None - - - def simulateSequences(self, num_sites): - self.states = [str]*(num_sites) - freq = [0.25, 0.25, 0.25, 0.25] - current_states = [ 'A', 'C', 'G', 'T'] - if self.par is None: - - for i in range(num_sites): - - ran_nm = random.random() - if ran_nm < freq[0]: -# self.state = 'A' - self.states[i] = 'A' - elif ran_nm <= freq[0]+freq[1]: -# self.state = 'C' - self.states[i] = 'C' - elif ran_nm <= freq[0]+freq[1]+freq[2]: -# self.state = 'G' - self.states[i] = 'G' - - else: -# self.state = 'T' - self.states[i] = 'T' - - else: - for m in range(num_sites): - prob = [] - ran_nm = random.random() - - for i in current_states: - if self.par.states[m] == i: - p = (0.25+0.75*exp(-4.0*(self.edgelen)/3.0)) - prob.append(p) - else: - p = (0.25-0.25*exp(-4.0*(self.edgelen)/3.0)) - prob.append(p) - for i in prob: - - if ran_nm <= prob[0]: -# self.state = 'A' - self.states[m] = 'A' - - elif ran_nm <= prob[0]+ prob[1]: -# self.state = 'C' - self.states[m] = 'C' - - elif ran_nm <= prob[0]+ prob[1]+ prob[2]: -# self.state = 'G' - self.states[m] = 'G' - - else: -# self.state = 'T' - self.states[m] = 'T' - - return self.states - - def __str__(self): - # __str__ is a built-in function that is used by print to show an object - descendants_as_string = ','.join(['%d' % d for d in self.descendants]) - - lchildstr = 'None' - if self.lchild is not None: - lchildstr = '%d' % self.lchild.number - - rsibstr = 'None' - if self.rsib is not None: - rsibstr = '%d' % self.rsib.number - - parstr = 'None' - if self.par is not None: - parstr = '%d' % self.par.number - - return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - -def simulate(preorder, ntax, num_sites, out): - newf = open(out, 'w') - newf.write('#nexus\n\n') - newf.write('begin data;\n') - newf.write('dimensions ntax=%d nchar=%d;\n' % (ntax, num_sites)) - newf.write('format datatype=dna missing=? gap=-;\n') - newf.write('matrix\n') - master = {} - for nd in preorder: - master[nd.number]=nd.simulateSequences(num_sites) - if nd.number >0: - newf.write('%s %s\n' % (nd.number, ''.join(nd.simulateSequences(num_sites)))) - newf.write(';\n') - newf.write('end;') -def readnewick(tree): - total_length = len(tree) - internal_node_number = -1 - - root = node(internal_node_number) - nd = root - i = 0 - pre = [root] - while i < total_length: - m = tree[i] - - if m =='(': - internal_node_number -= 1 - - child = node(internal_node_number) - pre.append(child) - nd.lchild=child - - child.par=nd - nd=child - elif m == ',': - internal_node_number -= 1 - rsib = node(internal_node_number) - pre.append(rsib) - nd.rsib = rsib - rsib.par=nd.par - nd = rsib - elif m == ')': - nd = nd.par - - elif m == ':': - edge_len_str = '' - i+=1 - m = tree[i] - assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] - while m in ['0','1','2','3','4','5','6','7','8', '9','.']: - edge_len_str += m - i+=1 - m = tree[i] - i -=1 - nd.edgelen = float(edge_len_str) - else: - internal_node_number += 1 - - if True: - assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m - mm = '' - while m in ['0','1','2','3','4','5','6','7','8', '9' ]: - - mm += m - i += 1 - m = tree[i] - nd.number = int(mm) - i -= 1 - i += 1 - - return pre - -if __name__ == '__main__': - - output_filename = os.path.join('simulated_output.nexus') - - yuletree = '(5:1.86010,((3:0.47109,2:0.47109):0.492,(4:0.05805,1:0.05805):0.906):0.896)' - - preorder = readnewick(yuletree) - ntax = 5 - num_sites = 100 - result = simulate(preorder, ntax, num_sites, output_filename) - From e205f4efccf756476b1caecdfe953536420d72a5 Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:54:22 -0400 Subject: [PATCH 07/11] Delete readtree.py --- readtree.py | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 readtree.py diff --git a/readtree.py b/readtree.py deleted file mode 100644 index a7b4943..0000000 --- a/readtree.py +++ /dev/null @@ -1,22 +0,0 @@ -# def treenewck(): -# script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) -# path = os.path.join(script_dir, 'nexus') -# for filename in glob.glob(os.path.join(path, '*.tre*')): -# f = open(filename, 'r').read() - -import re, os, glob, itertools, fnmatch, sys, shutil -# dirname, filename = os.path.split(os.path.abspath(__file__)) -def treenewick(): - script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) -# print script_dir - path = os.path.join(script_dir, 'tree.tre') - with open(path, 'r') as content: - newick = content.read() - return newick -a = treenewick() -print a - - -# dirname, filename = os.path.split(os.path.abspath(__file__)) -# print "running from", dirname -# print "file is", filename \ No newline at end of file From 6883d2d2d1c7fde7e554be10a6f2e4d2960aa812 Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:54:28 -0400 Subject: [PATCH 08/11] Delete readSeq.py --- readSeq.py | 69 ------------------------------------------------------ 1 file changed, 69 deletions(-) delete mode 100644 readSeq.py diff --git a/readSeq.py b/readSeq.py deleted file mode 100644 index 91c58a5..0000000 --- a/readSeq.py +++ /dev/null @@ -1,69 +0,0 @@ -def patterns(sequence_file): - # - import re, os, glob, itertools, fnmatch, sys, shutil - from itertools import combinations - from collections import Counter - - script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) -# path = os.path.join(script_dir, 'nexus') - - genes = [] - data = {} -# print 'Reading nexus files...' - for filename in glob.glob(os.path.join(script_dir, sequence_file)): - - m = re.match('(.+).nex', os.path.basename(filename)) - gene_name = m.group(1) -# print 'gene_name=', gene_name - genes.append(gene_name) - f = open(filename, 'r').read() - - m = re.search('ntax\s*=\s*(\d+)', f, re.M | re.S) - ntax = int(m.group(1)) -# print 'ntax=', ntax - - m = re.search('nchar\s*=\s*(\d+)', f, re.M | re.S) - nchar = int(m.group(1)) -# print 'nchar=', nchar - - m = re.search('Matrix\s+(.+?);', f, re.M | re.S) - matrix = m.group(1).strip() - matrix_lines = matrix.split('\n') - - taxon_names = [] - sequences = {} - sequences_list = [] - for line in matrix_lines: - parts = line.strip().split() - assert len(parts) == 2 - taxon_name = parts[0] - sequence = parts[1] - - taxon_names.append(taxon_name) - sequences_list.append(sequence) - sequences[taxon_name] = sequence - - pattern_list = [] - - k=0 - while k < nchar: - site_pattern = '' - for i,m in enumerate(sequences_list): - site_pattern += m[k] - pattern_list.append(site_pattern) - k+=1 - pattern_dict = dict() - for i in pattern_list: - pattern_dict[i] = pattern_dict.get(i, 0) + 1 - - tmp = [] - for key in pattern_dict.keys(): ###convert dict to key of tupules -# print 'key=', key - tmp.append((pattern_dict[key],key)) - - sorted_values = sorted(tmp) ###sorted according to key smaller to larger - sorted_values.sort(cmp = lambda x,y:cmp(x[1],y[1])) ###sorted according to values in alphabetical order - - return pattern_dict - - From a2fe87c36ff1c5c21200cb3028189ea757d1b3c4 Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:54:34 -0400 Subject: [PATCH 09/11] Delete randomSeq.py --- randomSeq.py | 211 --------------------------------------------------- 1 file changed, 211 deletions(-) delete mode 100644 randomSeq.py diff --git a/randomSeq.py b/randomSeq.py deleted file mode 100644 index 4925a91..0000000 --- a/randomSeq.py +++ /dev/null @@ -1,211 +0,0 @@ -import sys, re, os, random - - -# number1 = raw_input('number_of_runs? > ') -# num_run = int(number1) -# #print num_run -# -# ###specifying number of taxa to be generated -# number2 = raw_input('number_of_taxa? > ') -# ntax = int(number2) -# -# ###specifying number of sites to be generated -# number3 = raw_input('number of sites? > ') -# num_sites = int(number3) -# -# ###specifying number of dna matrix to be generated -# number4 = raw_input('number of genes? > ') -# number_genes = int(number4) - - - -num_run = 1 -#print num_run - -###specifying number of taxa to be generated -ntax = 6 - -###specifying number of sites to be generated -num_sites = 10000 - -###specifying number of dna matrix to be generated -number_genes = 10 - -num_trials_pergene = 2 - - -for i in range(num_run): - run_name = i+1 - run_name2 = 'trial'+ str(run_name) - print run_name2 - master_dir = os.path.join(os.path.abspath(os.curdir), run_name2) - if not os.path.exists(master_dir): - os.mkdir(master_dir) - - - ###creating a list for the directories for each randomly generated dna matrix - all_seq = [] - sets = [] - for a, b in enumerate(range(number_genes)): - a += 1 - set = "gene"+ str(a) - sets.append(set) - - ### creating dna matrix with randomly generated sequences - for n, i in enumerate(sets): - ### specifying names of the directory and dna matrix - folder_name = i - n += 1 - new_nexus_file = 'randomseq'+ str(n)+'.nex' - new_nexus_file2 = 'randomseq'+ str(n) - ### creating a list of randomly generated sequences - sequences = [] - for i in range(ntax): - myrandom = [] - for i in range(num_sites): - i = random.randint(1,4) - myrandom.append(i) - dna_list = [] - for m in myrandom: - if m == 1: - m= "A" - if m ==2: - m= "T" - if m ==3: - m= "G" - if m == 4: - m= "C" - dna_list.append(m) - dna_list2 = ''.join(dna_list) - #print dna_list2 - sequences.append(dna_list2) - all_seq.append(sequences) - - - - ### creating a list of taxon_names - taxon_names = [] - k=0 - for i, m in enumerate(sequences): - k += 1 - taxon = "taxon"+ str(k) - taxon_names.append(taxon) - - - ### creating a directory - new_dir = os.path.join(master_dir,folder_name) - if not os.path.exists(new_dir): - os.mkdir(new_dir) - gene_sets = [] -# for i in range(3): - for i in range(num_trials_pergene): - m= i+1 - seed_num = 4648+m - m2 = 'run'+str(m) - gene_sets.append(m2) - new_subdir = os.path.join(new_dir,m2) - if not os.path.exists(new_subdir): - os.mkdir(new_subdir) - - bash_filename = 'qsub.sh' - bash_file_content = '''#$ -S /bin/bash -#$ -cwd -#$ -N %s -#$ -q highpri.q,highmem.q -python runphycas.py -/common/galax/rungalax.sh --treefile trees.t --skip 1 -'''% (new_nexus_file2) - -# # ### saving dna matrix to the directory -# - full_path1 = os.path.join(new_subdir, new_nexus_file) - newf = open(full_path1, 'w') - newf.write('#nexus\n\n') - newf.write('begin data;\n') - newf.write(' dimensions ntax=%d nchar=%d;\n' % (ntax, num_sites)) - newf.write(' format datatype=dna missing=? gap=-;\n') - newf.write(' matrix\n') - longest_taxon_name = max([len(t) for t in taxon_names]) - for t,s in zip(taxon_names, sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n') - newf.close() - - full_path2 = os.path.join(new_subdir, bash_filename) - newf = open(full_path2, 'w') - newf.write(bash_file_content) - newf.close() - - full_path8 = os.path.join(new_subdir, 'runphycas.py') - newf = open(full_path8, 'w') - x = open('runphycas.py', 'r').read() - newf.write(x %(int(seed_num), new_nexus_file)) - -# -# -# - combined_sequences = map(''.join, zip(*all_seq)) - num_sites_combined = len(combined_sequences[0]) - new_dir2 = os.path.join(master_dir,'combinedSeq') - if not os.path.exists(new_dir2): - os.mkdir(new_dir2) - - full_path3 = os.path.join(new_dir2, 'combinedSeq.nex') - newf = open(full_path3, 'w') - newf.write('#nexus\n\n') - newf.write('begin data;\n') - newf.write(' dimensions ntax=%d nchar=%d;\n' % (ntax, num_sites_combined)) - newf.write(' format datatype=dna missing=? gap=-;\n') - newf.write(' matrix\n') - longest_taxon_name = max([len(t) for t in taxon_names]) - for t,s in zip(taxon_names, combined_sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n') - newf.close() - - full_path4 = os.path.join(new_dir2, bash_filename) - newf = open(full_path4, 'w') - newf.write(bash_file_content) - newf.close() - - - - full_path6 = os.path.join(new_dir2, 'runphycas.py') - newf = open(full_path6, 'w') - x = open('runphycas.py', 'r').read() - newf.write(x %(4648, 'combinedSeq.nex')) - - - full_path7 = os.path.join(master_dir, 'submitall.sh') - newf = open(full_path7, 'w') - newf.write('#!/bin/bash\n') - for i in sets: - for m in gene_sets: - newf.write('cd %s/%s; qsub qsub.sh;cd .. ;cd .. ; \n' %(i, m)) - newf.write('cd %s; qsub qsub.sh; cd ..\n' %('combinedSeq')) - newf.close() - - full_path9 = os.path.join(master_dir, 'treelist.txt') - newf = open(full_path9, 'w') - for i in sets: - for m in gene_sets: - newf.write(i+'/'+m+'/trees.t\n') - newf.close() - - - - full_path10 = os.path.join(master_dir, 'galax.sh') - newf = open(full_path10, 'w') - newf.write('#!/bin/bash\n') - newf.write('/common/galax/rungalax.sh --listfile treelist.txt --skip 1') - newf.close() - - - - From 654ed682b27c9401b0ab9d3f6d6a374ea41748b0 Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:54:41 -0400 Subject: [PATCH 10/11] Delete mergeSeq.py --- mergeSeq.py | 453 ---------------------------------------------------- 1 file changed, 453 deletions(-) delete mode 100644 mergeSeq.py diff --git a/mergeSeq.py b/mergeSeq.py deleted file mode 100644 index c28404a..0000000 --- a/mergeSeq.py +++ /dev/null @@ -1,453 +0,0 @@ -import re, os, glob, itertools, fnmatch, sys, shutil -from capture_marglik_pol import capture_script - -from itertools import combinations - -script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) -path = os.path.join(script_dir, 'nexus') -#path = '/Users/suman/Documents/Postdoctoral_projects/Algae_data/two_tupules_run/Mar4/combo_script/nexus' - -first_combo = 700 -last_combo = 770 - -# create output directory, recursively deleting it if it already exists after asking permission -output = 'g-700-770' -if os.path.exists(output): - print 'Directory "%s" exists' % output - answer = raw_input('delete it (y/n)?') - if answer in ['y','yes','Y','Yes','YES']: - shutil.rmtree(output) - else: - sys.exit('Aborting because you did not answer "y" or "yes"') -os.mkdir(output) - -number_to_merge = 2 - -#### counting number of genes to be analyzed -number_of_genes = len(fnmatch.filter(os.listdir(path), '*.nex')) -number_gen = range(1, number_of_genes+1) - -#### counting number of nexus files to be written after combinations -combos = list(itertools.combinations(number_gen, number_to_merge)) -number_of_files = len(combos) -# print number_of_files - -# chunks are pieces of the chloroplast that have not been rearranged -# The largest chunk (1) has 7 genes, the smallest (16-23) have only 1 gene each -chunk_map = { -'rpl23':1, -'rpl2' :1, -'rps19':1, -'rps14':1, -'atpA' :1, -'psbI' :1, -'cemA' :1, -'psbE' :2, -'rps9' :2, -'ycf3' :2, -'rpl36':2, -'petD' :2, -'rpoA' :2, -'psbJ' :3, -'atpI' :3, -'psaJ' :3, -'rps12':3, -'rpl16':4, -'rpl14':4, -'rpl5' :4, -'rps8' :4, -'rpoBa':5, -'rpoBb':5, -'rps3' :5, -'rpoC2':5, -'psaB' :6, -'ccsA' :6, -'psbZ' :6, -'psbM' :6, -'psbF' :7, -'psbL' :7, -'petG' :7, -'psbB' :8, -'psbT' :8, -'rps4' :9, -'clpP' :9, -'rps18':10, -'petB' :10, -'rps7' :11, -'atpE' :11, -'atpH' :12, -'atpF' :12, -'psbH' :13, -'psbK' :13, -'psaC' :14, -'petL' :14, -'rbcL' :15, -'rps11':15, -'rpl20':16, -'tufA' :17, -'psaA' :18, -'psbD' :19, -'atpB' :20, -'psbA' :21, -'psbC' :22, -'psbN' :23 -} - -##### nexus file for each merged pair - -new_way = True -genes = [] -data = {} -gene_javascript_data = [] -print 'Reading nexus files...' -for filename in glob.glob(os.path.join(path, '*.nex')): -# print 'Reading file:',filename - #pol m = re.match('(.+)[.]nex', os.path.basename(filename)) - m = re.match('(.+)[-]stripped[.]nex', os.path.basename(filename)) - gene_name = m.group(1) - - genes.append(gene_name) - f = open(filename, 'r').read() - m = re.search('ntax\s*=\s*(\d+)', f, re.M | re.S) - ntax = int(m.group(1)) - m = re.search('nchar\s*=\s*(\d+)', f, re.M | re.S) - nchar = int(m.group(1)) - - gene_javascript_data.append((chunk_map[gene_name], gene_name, nchar)) - - m = re.search('Matrix\s+(.+?);', f, re.M | re.S) - matrix = m.group(1).strip() - - matrix_lines = matrix.split('\n') - - taxon_names = [] - sequences = {} - for line in matrix_lines: - parts = line.strip().split() - assert len(parts) == 2 - taxon_name = parts[0] - sequence = parts[1] - taxon_names.append(taxon_name) - sequences[taxon_name] = sequence - - if not new_way: - if len(data) == 0: - for t in taxon_names: - data[t] = [] - - for t in taxon_names: - if new_way: - group = data.setdefault(t, []) - group.append(sequences[t]) - else: - data[t].append(sequences[t]) - -print 'Found %d genes' % len(genes) -for c,g,n in sorted(gene_javascript_data): - print ' {name:"%s", chunk:%d, seqlen:%d},' % (g,c,n) - -taxa_sorted = sorted(data.keys()) -# print "taxa_sorted=", taxa_sorted - -number_of_genes = len(genes) -number_gen = range(1, number_of_genes+1) - -#### counting number of nexus files to be written after combinations -combos = list(itertools.combinations(number_gen, number_to_merge)) -number_of_combos = len(combos) -print 'Number of combinations:',number_of_combos - -combo_list=[] -current_combo = 0 -for a,b in combos: - current_combo += 1 - if current_combo <= first_combo or current_combo > last_combo: - continue - print 'current_combo=', current_combo-1 - - new_taxonlist = [] - se1 = [] - se2 = [] - new_sequences = [] - seq1 = [] - seq2 = [] - genes_merged = (genes[a-1]+'_'+genes[b-1]) -# print 'genes[a-1]==', genes[a-1] -# print 'genes[b-1]==', genes[b-1] -# - for t in taxa_sorted: - new_taxonlist.append((t,data[t][a-1],data[t][b-1])[0]) - new_sequences.append((data[t][a-1]+data[t][b-1])) - seq1.append((data[t][a-1])) - seq2.append((data[t][b-1])) -# print 'new_taxonlist==', new_taxonlist - ntax = len(new_taxonlist) - newnchar = len(new_sequences[0]) - nchar_gene1 = len(seq1[0]) - nchar_gene2 = len(seq2[0]) -# print 'nchar_gene1====', nchar_gene1 -# print 'nchar_gene2====', nchar_gene2 - - new_nexus_file = '%s.nex' % genes_merged - gene_dir = os.path.join(output, '%s' % (genes_merged)) - if not os.path.exists(gene_dir): - os.mkdir(gene_dir) - - combo_list.append(genes_merged) - - mrbayes_single_run1 = '''begin mrbayes; -delete Ankyra_judai Atractomorpha_echinata Bracteacoccus_aerius Bracteacoccus_minor Chlamydomonas_reinhardtii Chromochloris_zofingiensis Dunaliella_salina Floydiella_terrestris Gonium_pectorale Kirchneriella_aperta Mychonastes_homosphaera Oedogonium_cardiacum Ourococcus_multisporus Pleodorina_starrii Pseudomuriella_schumacherensis Rotundella_rotunda Schizomeris_leibleinii Stigeoclonium_helveticum Volvox_carteri; -charset first = 1-%d\\3; -charset second = 2-%d\\3; -charset third = 3-%d\\3; -partition mine = 3: first, second, third; -set partition=mine; -lset applyto=(all) nst=6 ngammacat=4 rates=gamma; -prset applyto=(all) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) ratepr=variable revmatpr=dirichlet(1,1,1,1,1,1) brlenspr=Unconstrained:GammaDir(1.0,0.100,1.0,1.0) shapepr=exponential(1.0); -unlink shape=(all) statefreq=(all) revmat=(all); -set seed=9223 swapseed=9223; -mcmcp ngen=8000000 samplefreq=500 printfreq=8000000 starttree=random nruns=1 nchains=1 savebrlens=yes filename=ss; -[sumt filename=mcmc;] -[mcmcp filename=ss;] -ss alpha=0.3 nsteps=30 burninss=-2; -end; -'''% (nchar_gene1, nchar_gene1, nchar_gene1) - - mrbayes_single_run2 = '''begin mrbayes; -delete Ankyra_judai Atractomorpha_echinata Bracteacoccus_aerius Bracteacoccus_minor Chlamydomonas_reinhardtii Chromochloris_zofingiensis Dunaliella_salina Floydiella_terrestris Gonium_pectorale Kirchneriella_aperta Mychonastes_homosphaera Oedogonium_cardiacum Ourococcus_multisporus Pleodorina_starrii Pseudomuriella_schumacherensis Rotundella_rotunda Schizomeris_leibleinii Stigeoclonium_helveticum Volvox_carteri; -charset first = 1-%d\\3; -charset second = 2-%d\\3; -charset third = 3-%d\\3; -partition mine = 3: first, second, third; -set partition=mine; -lset applyto=(all) nst=6 ngammacat=4 rates=gamma; -prset applyto=(all) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) ratepr=variable revmatpr=dirichlet(1,1,1,1,1,1) brlenspr=Unconstrained:GammaDir(1.0,0.100,1.0,1.0) shapepr=exponential(1.0); -unlink shape=(all) statefreq=(all) revmat=(all); -set seed=9223 swapseed=9223; -mcmcp ngen=8000000 samplefreq=500 printfreq=8000000 starttree=random nruns=1 nchains=1 savebrlens=yes filename=ss; -[sumt filename=mcmc;] -[mcmcp filename=ss;] -ss alpha=0.3 nsteps=30 burninss=-2; -end; -'''% (nchar_gene2, nchar_gene2, nchar_gene2) - - - mrbayes_concat_topo_brlen = '''begin mrbayes; -delete Ankyra_judai Atractomorpha_echinata Bracteacoccus_aerius Bracteacoccus_minor Chlamydomonas_reinhardtii Chromochloris_zofingiensis Dunaliella_salina Floydiella_terrestris Gonium_pectorale Kirchneriella_aperta Mychonastes_homosphaera Oedogonium_cardiacum Ourococcus_multisporus Pleodorina_starrii Pseudomuriella_schumacherensis Rotundella_rotunda Schizomeris_leibleinii Stigeoclonium_helveticum Volvox_carteri; -charset first = 1-%d\\3; -charset second = 2-%d\\3; -charset third = 3-%d\\3; -charset fourth = %d-%d\\3; -charset fifth = %d-%d\\3; -charset sixth = %d-%d\\3; -partition mine = 6: first, second, third, fourth, fifth, sixth; -set partition=mine; -lset applyto=(all) nst=6 ngammacat=4 rates=gamma; -prset applyto=(all) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) ratepr=variable revmatpr=dirichlet(1,1,1,1,1,1) brlenspr=Unconstrained:GammaDir(1.0,0.100,1.0,1.0) shapepr=exponential(1.0); -unlink shape=(all) statefreq=(all) revmat=(all); -set seed=9223 swapseed=9223; -mcmcp ngen=8000000 samplefreq=500 printfreq=8000000 starttree=random nruns=1 nchains=1 savebrlens=yes filename=ss; -[sumt filename=mcmc;] -[mcmcp filename=ss;] -ss alpha=0.3 nsteps=30 burninss=-2; -end; -'''% (nchar_gene1, nchar_gene1, nchar_gene1, nchar_gene1+1, newnchar, nchar_gene1+2, newnchar, nchar_gene1+3,newnchar) - - mrbayes_concat_topo = '''begin mrbayes; -delete Ankyra_judai Atractomorpha_echinata Bracteacoccus_aerius Bracteacoccus_minor Chlamydomonas_reinhardtii Chromochloris_zofingiensis Dunaliella_salina Floydiella_terrestris Gonium_pectorale Kirchneriella_aperta Mychonastes_homosphaera Oedogonium_cardiacum Ourococcus_multisporus Pleodorina_starrii Pseudomuriella_schumacherensis Rotundella_rotunda Schizomeris_leibleinii Stigeoclonium_helveticum Volvox_carteri; -charset first = 1-%d\\3; -charset second = 2-%d\\3; -charset third = 3-%d\\3; -charset fourth = %d-%d\\3; -charset fifth = %d-%d\\3; -charset sixth = %d-%d\\3; -partition mine = 6: first, second, third, fourth, fifth, sixth; -set partition=mine; -lset applyto=(all) nst=6 ngammacat=4 rates=gamma; -prset applyto=(all) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) revmatpr=dirichlet(1,1,1,1,1,1) brlenspr=Unconstrained:GammaDir(1.0,0.100,1.0,1.0) shapepr=exponential(1.0); -unlink shape=(all) statefreq=(all) revmat=(all) brlens=(all); -set seed=9223 swapseed=9223; -mcmcp ngen=8000000 samplefreq=500 printfreq=8000000 starttree=random nruns=1 nchains=1 savebrlens=yes filename=ss; -[sumt filename=mcmc;] -[mcmcp filename=ss;] -ss alpha=0.3 nsteps=30 burninss=-2; -end; -'''% (nchar_gene1, nchar_gene1, nchar_gene1, nchar_gene1+1, newnchar, nchar_gene1+2, newnchar, nchar_gene1+3,newnchar) - - -#concat_topo -###########submitallscript########### - submitallscript ='''cd %s; qsub qsub.sh; cd .. -cd %s; qsub qsub.sh; cd .. -cd concat_topo_brlen; qsub qsub.sh; cd .. -cd concat_topo; qsub qsub.sh; cd .. -'''%(genes[a-1], genes[b-1]) - submitall = os.path.join(gene_dir, 'submitall.sh') - newf = open(submitall, 'w') - newf.write(submitallscript) -###########submitallscript########### - - -###########qsubscript for concatenated set########### -# qsubscript = '''#!/bin/bash -# #$ -S /bin/bash -# #$ -cwd -# #$ -N concatenated -# #$ -q highpri.q,highmem.q -# mb concatenated.nex -# rm -f mcmc.* ss.*''' -###########qsubscript for concatenated set########### - - -###########gene1########### - single_gene_dir = os.path.join(gene_dir, genes[a-1]) - if not os.path.exists(single_gene_dir): - os.mkdir(single_gene_dir) - output_path = os.path.join(single_gene_dir, ('%s.nex' %(genes[a-1]))) - newf = open(output_path, 'w') - newf.write('#NEXUS\n\n') - newf.write('Begin data;\n') - newf.write(' Dimensions ntax=%d nchar=%d;\n' % (ntax, nchar_gene1)) - newf.write(' Format datatype=dna missing=? gap=-;\n') - newf.write(' Matrix\n') - longest_taxon_name = max([len(t) for t in new_taxonlist]) - for t,s in zip(new_taxonlist, seq1): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n\n') - newf.write(mrbayes_single_run1) - newf.close() - ##qsub_script##### - qsub = os.path.join(single_gene_dir, 'qsub.sh') - newf = open(qsub, 'w') - newf.write('''#!/bin/bash -#$ -S /bin/bash -#$ -cwd -#$ -N %s -#$ -q highpri.q,highmem.q -mb %s.nex -rm -f mcmc.* ss.*'''%(genes[a-1], genes[a-1])) - ##qsub_script##### -###########gene1########### - - -###########gene2########### - single_gene_dir = os.path.join(gene_dir, genes[b-1]) - if not os.path.exists(single_gene_dir): - os.mkdir(single_gene_dir) - output_path = os.path.join(single_gene_dir, ('%s.nex' %(genes[b-1]))) - newf = open(output_path, 'w') - newf.write('#NEXUS\n\n') - newf.write('Begin data;\n') - newf.write(' Dimensions ntax=%d nchar=%d;\n' % (ntax, nchar_gene2)) - newf.write(' Format datatype=dna missing=? gap=-;\n') - newf.write(' Matrix\n') - longest_taxon_name = max([len(t) for t in new_taxonlist]) - for t,s in zip(new_taxonlist, seq2): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n\n') - newf.write(mrbayes_single_run2) - newf.close() - ####qsub_script##### - qsub = os.path.join(single_gene_dir, 'qsub.sh') - newf = open(qsub, 'w') - newf.write('''#!/bin/bash -#$ -S /bin/bash -#$ -cwd -#$ -N %s -#$ -q highpri.q,highmem.q -mb %s.nex -rm -f mcmc.* ss.*'''%(genes[b-1], genes[b-1])) - ####qsub_script##### -###########gene2########### - - -###########concat_topo_brlen########### - concat_dir = os.path.join(gene_dir, 'concat_topo_brlen') - if not os.path.exists(concat_dir): - os.mkdir(concat_dir) - output_path = os.path.join(concat_dir, 'concatenated.nex') - newf = open(output_path, 'w') - newf.write('#NEXUS\n\n') - newf.write('Begin data;\n') - newf.write(' Dimensions ntax=%d nchar=%d;\n' % (ntax, newnchar)) - newf.write(' Format datatype=dna missing=? gap=-;\n') - newf.write(' Matrix\n') - longest_taxon_name = max([len(t) for t in new_taxonlist]) - for t,s in zip(new_taxonlist, new_sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n\n') - newf.write(mrbayes_concat_topo_brlen) - newf.close() - ####qsub_script concat_topo_brlen##### - qsub = os.path.join(concat_dir, 'qsub.sh') - newf = open(qsub, 'w') - newf.write('''#!/bin/bash -#$ -S /bin/bash -#$ -cwd -#$ -N %s_%s_concat_topo_brlen -#$ -q highpri.q,highmem.q -mb concatenated.nex -rm -f mcmc.* ss.*'''%(genes[a-1],genes[b-1])) - ####qsub_script concat_topo_brlen##### -###########concat_topo_brlen########### - - -###########concat_topo########### - concat_dir = os.path.join(gene_dir, 'concat_topo') - if not os.path.exists(concat_dir): - os.mkdir(concat_dir) - output_path = os.path.join(concat_dir, 'concatenated.nex') - newf = open(output_path, 'w') - newf.write('#NEXUS\n\n') - newf.write('Begin data;\n') - newf.write(' Dimensions ntax=%d nchar=%d;\n' % (ntax, newnchar)) - newf.write(' Format datatype=dna missing=? gap=-;\n') - newf.write(' Matrix\n') - longest_taxon_name = max([len(t) for t in new_taxonlist]) - for t,s in zip(new_taxonlist, new_sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n\n') - newf.write(mrbayes_concat_topo) - newf.close() - ####qsub_script concat_topo##### - qsub = os.path.join(concat_dir, 'qsub.sh') - newf = open(qsub, 'w') - newf.write('''#!/bin/bash -#$ -S /bin/bash -#$ -cwd -#$ -N %s_%s_concat_topo -#$ -q highpri.q,highmem.q -mb concatenated.nex -rm -f mcmc.* ss.*'''%(genes[a-1],genes[b-1])) - ####qsub_script concat_topo##### -###########concat_topo########### - - -####### go.sh ######### -jobscript = os.path.join(output, 'go.sh') -newf = open(jobscript, 'w') -newf.write('#!/bin/bash\n\n') -for i in combo_list: - newf.write('cd %s\n. submitall.sh\ncd ..\n\n' %(i)) -newf.close() - -####### capture_marg.py ######### -jobscript2 = os.path.join(output, 'capture_marg.py') -newf = open(jobscript2, 'w') -newf.write(capture_script) -newf.close() - - - From 911f3d09ada1a517c4a9e07650100bd9c622e20f Mon Sep 17 00:00:00 2001 From: Suman Neupane Date: Fri, 26 May 2017 11:59:23 -0400 Subject: [PATCH 11/11] likelihood only --- readSeq.py => JC+GAMMA/readSeq.py | 4 +- JC+GAMMA/readSeq.pyc | Bin 0 -> 2280 bytes readtree.py => JC+GAMMA/readtree.py | 0 JC+GAMMA/simulated.nex | 20 ++ JC+GAMMA/test3.py | 398 ++++++++++++++++++++++ JC+GAMMA/tree.tre | 1 + JC+GAMMA/treeLike.py | 367 +++++++++++++++++++++ brnlenMCMC.py | 494 ---------------------------- tree.tre | 1 - 9 files changed, 788 insertions(+), 497 deletions(-) rename readSeq.py => JC+GAMMA/readSeq.py (95%) create mode 100644 JC+GAMMA/readSeq.pyc rename readtree.py => JC+GAMMA/readtree.py (100%) create mode 100644 JC+GAMMA/simulated.nex create mode 100644 JC+GAMMA/test3.py create mode 100644 JC+GAMMA/tree.tre create mode 100644 JC+GAMMA/treeLike.py delete mode 100644 brnlenMCMC.py delete mode 100644 tree.tre diff --git a/readSeq.py b/JC+GAMMA/readSeq.py similarity index 95% rename from readSeq.py rename to JC+GAMMA/readSeq.py index 91c58a5..6453201 100644 --- a/readSeq.py +++ b/JC+GAMMA/readSeq.py @@ -1,4 +1,4 @@ -def patterns(sequence_file): +def patterns(): # import re, os, glob, itertools, fnmatch, sys, shutil from itertools import combinations @@ -10,7 +10,7 @@ def patterns(sequence_file): genes = [] data = {} # print 'Reading nexus files...' - for filename in glob.glob(os.path.join(script_dir, sequence_file)): + for filename in glob.glob(os.path.join(script_dir, '*.nex')): m = re.match('(.+).nex', os.path.basename(filename)) gene_name = m.group(1) diff --git a/JC+GAMMA/readSeq.pyc b/JC+GAMMA/readSeq.pyc new file mode 100644 index 0000000000000000000000000000000000000000..157ab98c73500b6718e5c071b7703a61e33fe14a GIT binary patch literal 2280 zcmb_d%}ygn5Uy^60fYJfGyltKC9_Cy+(XC`0#T$$a4?D3L?CPMG-k|r#-46(j5Koz zd4oJiULl9PMvi%cM9MMwss^(`${~j}bh)Owe!lvudtCUVT>Aabp>G@X_)+-&49k`w ze4-zTzJsFZysBXlzn4&oSjSlQuprxT4T-WRT~VeujP05-J);K*X6O;~9t4#h7RUQ=KKO3VaAeAV9 z9H;(II@Q$DgMdj&$H*V12hGkWS5{}pLDZTezc_H3rgVb0;gpprEmLcT{1M5Hc+{Vz zbdn#Hn`2jhlwDi%^njXEl5T--yct0LHddzM-&lve1V`v(2LE7#+ zec?8whfPigx09yW+OR@h4a7l)a#IOX|S$ z1y!(a`}0J@s4?zw@1(|7FAzDI7Xcq z`HHP$FwHQ&c4JTi@ap0?w2q0siQ1lP+)B&#-a3BY+1s8A!WuMk;}DHF3{68ORO4$ZH!x18|{ia78a^z8BY497K+Xn3rPM*Pb;J#pY%b_c~J9>vT-yOHd3= z6mB?&WxNDP8NsQ@b3u*VlH6jZom}gLfs?%uIVRzMWWh8y@I{iuiO^so(4BbBND)71 zLQxg=oGYti~WWV4KscdO|Jfv;0kPj4B^m`MCWYv}3;) Lnzzin-2Q(6+d|q} literal 0 HcmV?d00001 diff --git a/readtree.py b/JC+GAMMA/readtree.py similarity index 100% rename from readtree.py rename to JC+GAMMA/readtree.py diff --git a/JC+GAMMA/simulated.nex b/JC+GAMMA/simulated.nex new file mode 100644 index 0000000..af9f492 --- /dev/null +++ b/JC+GAMMA/simulated.nex @@ -0,0 +1,20 @@ +#nexus + +begin data; +dimensions ntax=10 nchar=10000; +format datatype=dna missing=? gap=-; +Matrix +1 CTTTCTGGGTGACGTCGCGCGAAGCTTTTGCCCTTAGCCCTGAAACATAGCGCGCAGCACATGCATGTCACCGGTTTGTCAACCCGCCAGTGCTACTCAATGGGTCATATCGGGCATGATCCGCCTCCCTTCCCGGCCATATTAACGGTCTTGAACCTCATTTTCGCGGTACGCGCGTCCACGTTGTGATTGACAAGCCTCCATGCGGGAGCACCTAACATACGTTAGCCTGATCCATGGGACGTAAACTGCACTACATCAGTTTTGTGGGTCTCAGTAGGCGGCTCTGACAGTAGATTCCCTGCAGTACAGATCTCTACGCAACACATAGAACTAGGTTGCTGGAAGTTGAAAACGTTTCGACTGCCTGTACAAAAAATTCGTCGGCAATACAAGTACCTGTAGTTGCCTTGATTGCTCTCAGGTCACATAGGCTTGATTTTAACCAGACTGACGAGTGGGGGCCTCGACGCTGCTATAAAGTGATACATAAGCTCGGAGCAACAAATCTTCAATCGTTTACGAGGGGGGGCCGGCGCAAGCGCGATCACTCTTCCTGTGCCGTATATAGCTCGGATAGGCCCAGGCGGCTAACTAATCCGGTTACAATCAGTATATAGGCAGGGCACGGCACGTGCGGCCCGTGAAACGAAAAATACCTCAATGCCATGGCACTCCTATCTTTGAGGCAACTCGCTGTATGCTATTTTGACAACTCCTCCACCCTAAAAACCTCTTCAACGTGAAACCGTTGCAATCGTAGAAGTTCTCTGACTGTGATGAGAATGCCCTGGTGCAACCGTTTGACTAAATCATACGGTGCTGAAACTGCACATGAACCTCTGGTATCGGAAAGGCGAGGAACACTCGGTATTCGCGCAGTGGAATCGTCCCATATCTATATAGTTAGGCTGAATAGAAAACGCGGATACTCTTTGTCCCACTGAGATTGGCCGGATGCAAGCGTACTCTTAAAGATTTACGCATCTAACTGTCTCTACCGGGGGATATCCCCACACTAACTTTGACAAGGACGTCTAGACCTAAGTAGGGATCTCCGAACTGTCCTTCCTCTGTCCGTCAATCCCGGCATCCACTCTAACGCCGACAAGCTTAAAATTGATATAATAGCGAACCCGGAATTAGTGTCGTCGCCTCGGGAGGCAGTTATAGATTAGTGATTAGCCACTATCGCGCCCGTTTGGACAAAAAAGACGCATAACGACCGCTGAGCGAGATAACATATGCTGCGCCCGACGCGCGAACGTGCCATAGCCTGCAACACTATCGTGGTAGTCCGTATCCCTTCCGTGATCCCCTGGGCCACTTACCCGACTACCTCCGATGTGTCCCCCCTCGTGCAAGACACCGTCCATAGGCACGTTCTGGATTGCTCCGTACGACCCGTAAAAATCAGTGGGACGTCCCTAGAGGCAGACATTCCGGGACACGGTTTCTAGAGCGAAAGGGGGTTGTTCTGGAAGAGAAAATAGGGACCTCATATAAGTAGGAATAAACGGTCGACGTCCGAATGTGGTCACCTTGCTTTAGACGCTGTTGTCACTTAGCAAGTATGAGATGGACGCGAAGTGGTCAACTACCCGGTTTCCCCTGACGGGGTACGTTGCGAAGGGAACTCTGCTGTAAGCAACAGCTGTGGCGTTAAAGGCAAAGGCGAAGAGTGGTAGAACGTACCTCGTTCATCGACTTACCGAGGTTTTCTAATTCTTTGCAATCATGCTTATTGAGGTTAAAGAGTACTCACCCGTTGTACTCCGGTTTTTCCTGTAATGTTTCCAAATGGTTAATGAAACTAGCCGCTGCGCCGGCCTAGTGATCCCGAGCCTGGCAACAGGGAACAACCTTAACGGGTGACCAGTGGGGTGTCTCTGTGGATATAGTCACCCGCCTAACACGCCAGTGATGGAGTTTCTAGCTAGAGACATCTTCGGCAGCTGGTATAAGTATGTGATCTTGAGTGCGGTAATTACGAAGAGTTGCGGAGGAATCCGTGCGTTGCCTTTAGGGTTCATCGAGAGCCCTACCTCCAATTGTATCTTACGCGTCCCGCGGGTTTGTCAACCTACGCAGCCGTTCATTCTCGGTCGACGACGGATATAGGCTGGAAAATTATCGAGAGGCTGGCCCAACTGAGCTCTCGTCACATAGGACTCGGAAGATGCGTGGATGAGCTTAGACTTAGCGAAGTAGCTGCCTTGTTGCCGTCAAGGATATTGCATCACGCAAATACTACGAGACACCCGTACTCCAACCGATCGATACTTGAAGATTTGTCGAAGGTCAAGAAGATCATACACCTCCTGGGGCACTAAGCCGGGGATTAAAGAGCTGGCAAATAACTCACCATTAAGCGATGAGCGGCCCAGGTCCTCCCTAACGTTGATGGTCTGGGGGTTGGGTGAACAATTTGGTCCACGGGGACTTTTGTCGGTTTTCTCAATAGGAAAACTCACCCACGTTAACGCTAGCCACTACCAAATATGCGGGATTTACCTGCGCTGTGGGTAGCCAATAGATGCACGAGCACCGTATCGAACCATCAAGCTCAAATCGTTCGAACAAGGGAAGTGTAGCGTTGAGCTAGTCCATCTAATCATCAATATACAATAGCCGGCAATCAACTGGCCTTAGCGGTCAGACAGTCGAGGCTGGTATTCGCATGTCAATCGAGGTCAGACCACGAGGTTGCACTGTTCTGCTCAGCGGTATGGATCAACATGCGATGTGCGCCATTCGTAGTGCCGCTTGGCAAAATCAATTTCTCTCCCTGTGCTACGTATGTTCGCAACACCTTGGCCGTAGTCCGGGTTTTTTGACGGCTGGGAGTTCTCGCAATGGACATCCCCGGGTTCCCGCCACGCCTGGGGTCTACAGGGGGATCGCGGTCATGCTTCAGAATTCCGCCTACGTGTTCTAGCAATTATGAAATCAACCAATGGCCTTTTACATGTCGGTGCTCGAATGTGTGGAACGAAGTTGGCTTGGTGTATGGATCACCCCACTAATCTTTTAAGAGATCCGGCTTAAACCTCACTCCTCTTTGTTGGCGCAAGCATAGTTAAGAATTTGCTTTGTGCCTGTGCCATTACGATTGACATGGTGGGCCCCGTATAGGGTCGCCAAAAAATTGTGAAGGCGAAGAGTCAAATGCCTTGTTTGGAACTCCGGCCGGGTGAAGCGCACTCTTGGCGATTGGGATCCTGACCCGGATCCCCAGTTATGTTACTTGGTCGATTCGTTTCTGGTCTTTTCACGGCGCCCCTATCTGAGAAACTTAGTCCATCGTGCAATAGCACCTTACACGTTGTGTCGTTGCCGGTGCCGTTGTTTTATTTTGGGGGCCGAGGGTGTTCCCAGGTGGATCAGTACCGAAAGTCTGTCGATACTAAGGTTAAGGGGTAGACGGGGATTGTTTATTGACAACGGTTCGTCCTTCGGGATTGCTTGATGTGATAACGTCTGATTCGTCCAGTATATCGTAGGTCTCACCGCTCCACGGTACTGTCTGATTGAGAAGCTCGGATTTTCCTGTTGCGCTGAATCAACTTCGAGGTAGGCGCGCTCATGCGTACAGGTAAAGAATTTTGCCTAAAACGCCTGCACGCGCTATCATATCACTCTGAAATAGGTTTTCTCGCCCTGAAACTAAAGTCCGTTGACCACCCGGAGAATAGGGATGCCGTATTATTTAGGCGGGACCTCTGGTATGAAACTCTTAGCCTCGTTCTCCACGGTAGATGAGTCGGTTCTTCATAGCCAGTGTACCGATTGCGGCTTTATTTGATTGGCATACGGCGGAGTTCTTCGTAAGCAGCTTATGAGCGCACTGTGGTATAGTTAAATGGATGTTTCCCGTCTCCATGATGTTGCTGTTCCATAGCGCTACGGAATAGATTGCGACGGTCGCCTTGGGGATTACTCATGTGTCACTTTGAGGTGAGCGAAACGCACAAGGCCCCTTGGGGCGCTAATTTTATTAGGCCAATCATCCGGGTCGCTGAGAAATTATTCGATCACGTATGGAGGTCGTAGGTGTACAAATTAACCACCGGGCTAGCATACGATCACCAGATTTTACTCAAGACTAAACTTCTTTAATGTGGGTCTAAAGTACGAAATAACCCGTGGTCTATCAGGAAGTGCTGTTTTAACGTATAGCATCTAATTTTGGACGAGCCTAATGTCGCCCGTGACTTAATTAGTCACAATACGTACGTTATGACGCGTTGACGGCTCCTAAGGTAACCCCTAGCCATTCGAATCAGAGGAGATTGTAGGCTAGACCCTGCGTTGGTTGAGCGTCCACCTAAATCTTCAGGATGTGTTTAGTTATTTCGACCGAGTCCAATGTAGCGCCTCCGACGGCTTTTTTGAAATTCACTCCGTCTTGTTAATTGCCGTAGATGGCTCGAAAAAATATGCTGAAGGGGGGCTAGCATATTCGCTAAGCCTCATCTGGTGTGCCGAGTAACGGGTCCATATAAAAACCCCGCTCCTATTCGTTAATGGGAGCTGATCCGCGTCGTTATTGTTCAGCCTCCCTACCGCAGTGGGAATGACTAGTTAGAAAGAGTGAAGGATTATTACGGCCATGTCTATTTAAGTTGATCCATGGATATAGTAAGGCTATTAACTACGAATTGCAAGTTCACGCAATGAAAAGTAGCGCTTGTATCGCAAGTCATGGGCTAAGCTTAACGGTGACTCTATGACAGCGTAAATACTGGGCGCAAGTGGTTATCGATATCTCCTGTAATTAAGACATTGGAAGCCTTTGGGCTATAACCTACTAGCCCTGTGGTTTGTGCACATAGAGTTAGACGGCGAGCGCAGCCGATGCAATGATCAATGACTCGACGGCATTTCTCATCCAAAAGTCGCCAAACCGACACTCCACTAACAATTGCTAAGGAAAGAACGGGATCATGGTTTCGCCATTATCCGCATTGCGGATTCACGCCAGTTGTTATTATTTTCCATGAGTACGGGCCGGAGGCTAATACTTGAGCGGATGTACACGGTTGCGAGTTGGGGCTGCTTACGCCACTCGTATCATGCGCGGGCGCGGATCTATCGGTGGCTACGTAGTTGCTTAGACCTATAAGGTTAACGGCGACCTCGTCAAATCGTGGAATGCTACAGGACCCGCAACTGAAAACAAGCCGACACCGGACCAATCGAATAACGGACTATCGTGTGCATTTACTCGGTGGCAAGTTCGTACACTTACAGCGAGATATAAAACGATCTGGCGTTGACTTAATCCCAATTGGCCGAGCCGCAGACCAATTCGGCCTTCGAACGTATCACTTTGAGGCCAAGGATACAAGATCACCCGTCGTTTAAGCAGAGCCACCTAATGCTTTAAGAATATGGAGCCTATTCACCTTGTTATTGCGTCTGCCCATTCCCTACTATCGTTTTGGGAACTGAGCATTCGCCACCACAAGCTTCAGTGCCGACAGCACGAGAATGTACGTGGTGGCAAGTCTGAAATGAAAGGTACGTAGCGGTATCTACGCCCGGTCTCTGCTTGCTCTATACATTTGATCCATTGGCGGCGGCGACTGTAGCTGAGCCGAGGGATGTGCTATCGGTTCGCATTCAAAACCTGTCATAGCGCGCGACAGTAACTGGGTTCACCTCGTTATAATTGGGGGATACGAAGCCCGCCCTATAACATGCTAACAACGTAAATAGTGGAAATGTGCCGCACGTAAAGGACAATGGGAACACGGCAAAAAACGAACGGTCTTAGACACTGTTGCTGTGTAGGCTCTCCCCCTTTAATAATGTAGACTCCCTTGTCCGGGTAGCACAGCATCCAAGGTCCGTGGTCTTCTAGGCTAATTTCGTGAACCGCCGGCACTCATATACTGGTAATAGATTCGCGGTTCTGCGCCTAGGACATAAGGTCGGCGCGCAAACCCTGTCGCACTCGAGGATCACCTTAGTTGTTCCACATGAGACCGCTCCCAGTTAGAGTGTTTTGGAAGCCTGGATCATCTGGCATGTGCGCTTGGGGAACAAAGCCCACAGCAAGAACTTTAACAGCTGAGACTAGATGCAGAGGATTAATTGTCCCCTTAATCACTTACTTGAGGTGGATATCGCCGCAGATCTCTGGCCCAACCCTACGCCCAGCATCCAACTCTTGCAGGATTTATCCCTGTTCGGTGGATGACATGATCTGGTTCTATAATCCGTCCAGCGTAATGGACACGAAATGAATAGTACGGTTGAAGAAAGCCCCCAGGAGGGTGTAACACAGGTTACGCTCGTGTCGTCCTCCGTCAAAGAATGCTCCGCAGATCTAATCTACTACTAACAGCGCATTGACCGCAATTCACAGCTTCACGGGTTGAATCCGGTTATTCACGGTCCATTTATTTCATCGTGACCCCACCGGCCGCTAAGCTATAGGGCGGCGGAAAACGCTCCCAACCAGGTTGATGGGGGCCGCTATAGAGGATTATGCGATGGTGTTTGATTTATACCAACCCGTCCGGTTTTGTAATGGTTCCGATTGCACGCTCGGCGCAGTGGATAATACCTCGTAGCTCCGCCAATAGTGTTACTAAGTTATAGGGAAAGACGGCGAGCATGTGGTAGGCGGATAAGGTCCTCTAGAAGAGCTACGTAGCCGTGGTTCCCCGGGGCCGGACTCCGTGCCTTTAGCTTTTGACGTTTCGTTGCGTCCTGCATCGATTGTACCAACTCGACAGCCCAAATCATCAGCGAAACGGCGAGTCTATACCTATTACATACGCGGTAAGCGGACCCTCAACGAGCCAGCACGATGAAACATCAGTCTGTTCGAAGCGTCTCGTTTCTCCAGGTGAAGGTCGCACATATGAAAGATACAACATTGCTTCATTTTGCAATAGATGTGATGTCCACTGTAACCACCTATTGCCGATGCCTCATACGTTCGGTTACACCGAGATTACTGCGACTATCGGGTCGGCGAATATGCGATTCTCCAGTGGATCTTTCTGATACGCCCTTGAACGCGCATCTCCCTTGAACGAGGTAAAACATGGTTCAAATTGATTTGCTAATGAACTTTCTGAGTAAAGCATAAAATTGCCGTACCCGCAGCTCCCCCTATGAATGCTCTACGAATGGGGTAAAGCTCTATACTCCTCTGCGCCCGGCGATCCTAGACTTTGCAAGGAGCTGGCTAAGATGATAGCGAGGGTCCTTAAAGTAGCAGCTACCCATCGGTAGCGACAAACTCCGTCCCGTCGATCAGACCCGCGTCGCAACACGAATATAATATTCCACTTGCCCGTCCGCATTACGAGTCCGACCACCCGGAGAGAGAAGTGGGGTCAGCGAGGGGAACGTGTGTACACTTCCTCGATACTTCCTCTACAGTGAAGCATATCTCGCTCTAAGGCCAAATGCAACAAGCTCACCCACAAGGGTCTTGACTTCGCTGGGGCTTGCTGAGGGCCCTTACACTTTAGTACGAAACATGCGTCTGCGACAAATTGATGAAGGACCCTATGGTCCAGTTCACCCTTCAAAATCTACGGTCAGTGTCCGGACCTTCCGCCTGCTTCGCATAACATCCATAGTAACGATAGCTACCAAGTTAGTTTCCGGTCGTATGAGGGGATACGCGTGAAGTTAGAAACAAATGTGCGGTTCCCGCTATCAATCCCAAATACTGTAACCCTGAGTACCGTAGGCTGAGCGTCTACTCTAGTATCATCGGGCGTGCACTGTGATAAACGTTGGTGGAGCAACCTTTATCTGAACATATAAACTTCGCCACCGGCCTTACCTACCATCGTTCGTACATAGAACCCATTAGAATAGATGTTCTTTGAGGGAGTTACCATGTCACAGTTACACATAAGGACACTCCGGAGTTTTACAACTCAGGCTCGGTACCGACGGGAACCTGTGGAACTGCGTCTAACTCTGGAGCAGATCCGTAGTCCCGCGGACAGCGCACTGATCTTGACGCTAAGTGAAATTGGTAGCACGTCGTATTCCCGGCGTTTTCCAATATTGCAATCATCGTTTGCTCGGTGGAATCAGCGACGGAGGCGTATTTCCAGATATATTCAGCTCAAGGGAGTGGGTGGCTGCGATACGCATGCCGGATTTACACATCTGAGCATTAGGGAATAAGGTAGGACGATCAGCACTCTTGAGGTTCCGAACGTATTGGGATTCCGAACGAAAACACGAAGTTAACGCGGAATCAGTCATAGACTTCCCGAGTTGCCAGCTGTGGACGCACTGCCCACGTTAAGACATAGTCGCGAGAAGCATAGCAGTACAGGTGCGACCGACTTTTGTACTCTTCTCTGGAGACGGTGCAAAGACGGCAACCGACTGGTCAGCCCTTACGTTATCGCCTCAACATGTATAGGGTGAGCGAGATCCTTCATTGGTTGTCACGGTACATCTCCCAGAGATTGCAGTTCAGACTAACACGAAAGTGCATTGTACTCAAATCTTATCTCAAGAGCCTTCGGGTTTATTCATTGCGTAGACGTGGCCCGCTGTGCTCTGTAGTTCAATCCGTGTAGAATTACGAGAAACTCCATGTTCCGTGACCGTTTCCCTAACCCGGCTATTCTTCGCGGCGCCGTTCATCCTTGTTGTCCCGCAGCTCGTTTACTTGAGTATTTCCTGCGAGCCCAAAGAAGTCCATGAGAAATTCATAAAAAAACTGAGACTACTGGCGGATCAAGCACCCATAGCTGGCGATAGCGTTACGGCGCTATACGTTTACGGGGATATTGAATGTATCTTCGTAGTCAATCCAAAACACCGACGATCACGGTCCTAATTATGACATCTTTGTTCTCCTAGCAGTGATTTCTATGAGCTTTTAGGAAGAGGAATCGATTGAAAGCAAGCAAGTGGCTATCCACCAAAAACCGGAATGGCGCAACTATTTAGCACTCACGGCTGTGAAACAGTCGACACACACTCTTTAGTCAGCCATGAGTTTCTTCTCTTGCAGAGATCTCAGCCGAAATTGCGCTCGCCAAAATATACAGACTGACGACCAAAAGAGTCTTGGTGTTGTTAGCCAGTTAGTTAGAGTCTTATAGACAAGGTTGCTGGTAACCCTTCTAACGTCCGGCGCTTGGAGGACTATCAATCACGTCGTATCCTATGCGTTTTCACGTACGACGGCCTATTAAAATCCTGTTTAGGTTTGTAGGTGACTGGCGACCTCTGATTCCCAGCTCTATAAAGGTTGGGTCAACGTTCGCAGTCATCTAGTCTGGAGTAAAGCATCCATTTGCTATGAAAAGCCTCCTATAGTGTACCCCACTTGCCTCGGTTAGACCTCTCCAACAATTTCCTACGCTCGCACAAGAGAGAAGCATCGTCATTGAGAGTGCGCGCGTAGGGCGTCCGACGCTCGGCTGTATAGAACGACTCCGTTGCCGCTAACTAGCGCTTACTGGGCAATACATACCATTGCCCATTTGCCGCGTTCGCACTCCTCAATGCTTTAGTTACCTGAGTCATCATACAAAGCTTCCTAAAAAACCGTCTTGAAGCCAAAGCTAAGGATATACTCCTCCATTTAGTTACTTTTAAATCGGCTACGTGTTCCAAGTATATGTCAAACGTCGGATATGCGTCACGCATTACAACCCAGGCTAGCCGAGTATCCTGATGAGCCTGCCTGAGTGCACGGCCCTTGTCACGGTATGCCCTATCCTGAAGATATGACAGCATACCGGTGACTATATTGGTCGCGCGGTGGCAAAAGGATGCTCCAGCAGTCGGTCCATGATTTTATCGACCGCTAGGTGGGTGGCCTGGAGTCAACATCCAATAGAAATTCGGCCTCGCATGCCACTGGTCTCAATTCTTCCATCCCCACTTAACTCCGTTAAACATTGTCTGTAGGCGCCAGGCCCAGCTCGAGAGTCGGTGTCATTAACGAAGGAAGGTTCTTGCTCTAATCGGCTCCCAGGGACCAAAGTCCTGGATG +2 GCATTCCGTAATGGGTGACGGCGTAGCTAGCCTACTACAAGGACCGTTTCTAGGGGCCGGGAAAAGGGTGACGTCCCTCGCTCTAGACTGTGTGTAAAGGGTCATGTCATTAGCCTAATCTATGTATGCACCAAAACGAAGCGTGGGGTGTTTGCGGGTCTCAGTGGGTCCACAAGACCCCTTTTGGCAGAGTCAAGTAGGGTGTGCAGTGCACTCACTGTTATCTAGGGAGCGGGAACCCCTTATGATGTCGTGCTTTAAGTTCACGATCGAAACTGCTGCAGTCTCATCACAAACCCTCCCCTGTGCTGAACAGGCAGACTTGTGTTCCATCACCCAACGCTGCGAACCCACGCAGTTGGTATGATAGCTCCATCAACAATTAAAATTGGTGACGTCGTTTCCGGCCGAAATTAGTGCATGTAGCCGTGGATCGCAAAACGCCGAGCTACCACGCCGCGACCCGCGAAACGATGCTTCAGATGAGAGGGTTGTTCGCCCGCTCTCCGACCGAGCAGAGCGTACAGGCTTTATTTGACGACTAAGTGTGATGTTTGCGGTGTTGTCCCACCAAGTATACCCTACGTGAGAGGACCCAAAGGCCAGAATTACTGAGTGATTCACAGTATAGGAGCGCTTGTCTCAGTCGAAGGTGGACATAATGGTACTCGGTACCTACCGCCTAGAGTGAACAAACCCGGGGTCCGATTGTAACGGGCCGATTATAGATTGCAAGGGATCTATCCTAGCCAGTCGTTTAGCCATGAGGGGACAACGCCTGGACATACATACCGCCCGTGTACACGGATCAGTACGCCGATAGACTTTCTAATCCGGAACTAGCTAGGGTAAGGTGGTGCTATTGACGATTCCGTCCTTTGGTGTTCAGAGCTCATTTAACTGGACAGGGCTATAATTGATTCAGTGTGCGTCTAGAGTTCGGACCCCCCCATAGCCGGGGCAATCCTTCTATTCGCTAATAAACACTTCATGTCAGCTTACCGCGAATAGACTGGACTCACCGGCCGTTTGTACGGTTCGTCGCTTACAGGCCTGCTTCGTGCTCGTGGCACTGTAACAAGATCCTAGAAACTTCAAAGGTCTCACGAAAGACACACTGATCTCTGCCTCTGTGCTGGCGCGTCACCAGAACTAACAATAATGACCACGTTATATATTGAATAGACACGACTCTTACCAGCCATTGACTCGTGCAAGAACCTGCGTGCTGAGTTTGCAAATGAAGATCTCAAAATGCGAGTTCCTAAGGTGTGTATGACGAAAAGAACTCCCTTAATGCAATTGATCCAAGGAGTGTATGTCGCCGTAGTTGTCGAATGTGTCGCCCTGCTAGCGCAGAACTAACCTTGGACCGCTCTGCTCGACGCACGTTTTCGATACAGGGCGGATCGTAGCTGACGCTCTTATCTTGCTCACGTGAGATCATTGGGTCACTGAGAACGATCCCACGGACACTCTAAGAGGAAAGCAAGGCTAAGGGGCTGTATTGAAGGGCAGGCTCAGTGAATGGGCGGCTGGAGACCGTTCACTATCTCCATGAGAAAAATTCCCGGGATCCTGGCCCTAGTCGGTGAATTATTTTCGTTACCGGGCAGGTTTCGTGCAGCCGTGCAGCAAAAATTGTCTTCGGCGATCAGCTATACGGGTTAGCCGTCCGTGCATTGACACATTGAACTTCGAGCTCACTTATATCATGCACTGAAAGCCTCTTCGAGCAAAAAGCTTGGCAAGCACTTCATACATTCGCACATCTCCACCTACTCGGTTCTCAAACCAGTACCCACCTTGCCACAGACGTACTTGAATGGAGCCCCCATAGAAAGGAAGGTTGGAGGGTCAAGTAAATGTCATGTTGGTCAAAGGACGGGGCAAATGATTGTGGCGGTCTCGCAGCGAGCCTTGCCTAATTGGGCACGCGTACTTGAACAGCTGATTGCGCGCCCAGGCGGCGGGAACATCATTACGCCTTTGGCCTGATCTTCTATCGGGCTACTACGAGACTGGGAAGTCATATCACAGCGATTGAGATCTAAGAGGCAAAGTTTTGGTGGTTAATCAGAGAGGGAAGAACAGATAATATGTTCCCGTAGTCTACAGGGTGTCAGTAAGACTATAGGTATGTACACGGTATGGGTACGGACTTCCCGCTCAAGATCAGGATCCTTTCCTAGGGGTGCAATGCCCGGCTAGGTCGGTACAAGGCCAATCAAGGGTCTGATGAAATTGCGTCTACACCGGTGTGTAGTTACGAAATCTCTAGGATCAGAGCCGAACGAGCTACAAACTAGGTTTTTGTTATCACAAACACTACCCCAAGCTTTCAGCAGGTTTTGAAGGTGTCTGTAACCGTATGGAGGCGCGGGGAGGCAATCTTCCCTCTCTTTGGCAAACGCAATACCAGGGAAAGCCAGCATAACGTAACAAGTTTTCCCACCAGCAGACTTGCCTACCAACTCGAAGGTCAGTCTTAGGTTACGCGCCTAGCATGGTTGTTATGTATCGTAGCGGCTGATTCTAATAGACACGATTAGTGTTGCCCGGTTTCGGCATAACGCCGCACGGATCGAGGAACTTCCGGATCCTCCGCGGTCGCTATGCTCACGCCGACGTCCGGTCGAAAAAAGTAGGGTTTGCGGATATCTTCGTCTGCGTACTATCACCTTGGCTATTGTATGCGTACAGCGGACGTCAAGCCGTTACCCCACGATGTTACTAGCCCATTGGGCGTCGTCACTATTTCATATCCTTAAGTTACTAACCGATAGCGTGAATCCGTCTAGCGCGAGTTCCGCACACTATAAGCCTGACTTGACGGCTGTGCCAAGCACGACCGGGATAAGGCGAGCTAGAATCAGGGGCCACGGTTAGCCAGCTTGTACCGAGGACATGCGGGTCGACCTTCCAGCTAGTCGTGTGCCAACGCCGCTGACTCGAAGACCGCGACCCAGGGGCTAATTGTACCAAATCGTAGGCTATCATACTGTATCTCCCTTCCTCCCAGCGAGCCGGCCCGAGTATTTACCGACGCCGTAATGAATCGCCAGGCGCGACCTTGAATAGGCCTGCATTACTACGCAGATGCTCACTGAAACTCTCACATATACTCGTCTCCTAGCTGAGCTATTAGGGCAGAGCGGAGTGTCCCCCTACAAAGTGGCCCCTTCTGAGGACAAGCCTCGGAATGCCCTTGATGCACGAATCCCGTTAATTCACCGAGGAGTTAGGTGATCCTTTTACAGGTGCTTACTGTGCGGTTGGGGTGTGCAACCTGAAAAGAGCGTCGCACATTAACGCGATAAGTACCCACGCTGAAGGTACTGATAGTTGACTTTCTGTAGAATCTAGATTTAGACGATCGCGTCAACGTCCACCGGTTTGTATGGAGTGTCAGCTACGTTGATCAGTCTAGACATCTTGCATGATCCTCACGAAGGTTGCGTCCCTTATATCTGAGCCGAACCTGTAAACATCGGATATACGCCGGACTGACTCATGAGCGTTAAGGTGGCACTTGGTCCCCAGTTGGTAATGTATAGCGGCCTGGCTTGCATCAGATCACAGCAAGGTTGCTTAGCCTCAGGGGTGCTGTACTACCTACCTCAGGTCAAAACAGCGCTTAATACGTCGGTGAGTCGGTAATAGCTTAGACTTAAGGTGCAGTCGCCAAACGCAAACGCTGTAGAGGCGTCGTGGGACTAGGTCTCTAGACTCGTGATCTTGGATTGCACCCTTCGCGATGGGCTCGCAGCGAGCGCGTATCGACTGCGAGCTGTATGGACGCCTATGATCTTGATCTGCATCGCGCGCTTTAGAGGGAATTCCGCACCTTATTGTTCTCGCCAGCGACCCTCTTCTATGGTAAGAAGGGTAGACTTTGTATGTTAATGAGCAGAAGATAGTCGGACAAGCGATATTACCAAGGAAGACGTACTCGTAGCCGAAAGGCTCGCGCGGGATCTCCCTAACATTCATAAGACAGAGTACAGGATTTCAATCCGTCTGGACATACGACCAAAGCCGATATGAACTGGCCAGAGAGAATCATTCCCCTCGTACTCCCTGCGGGCTTATCAATACCGATCTTAACAGCGCCATAACCAGGCAGGTAAGTATGGGTACAGTTCGATCCGCTTCCGGGGAAACCCCGGAGCAGGATCTGTACCCATCAAAGTCCTCGTTAGGCAGACGTGAAGGCATGGACGCTCGCTGTCAAGCATGGCGATGCGTCTGCTTTATCAGTTTGAGAGCCCCGCCCTTATAGACATATATTGCGTCGATTCGTCCTCGGCGTGAACCGACACTTGAGTCGTGGCTCATGCTGTCCGAGCCGAACTCGAATCCGCAACGAATCACAGAATTTCTTGAACGGGCACTACCCCCAAGCCAACTTGTGGGTAACAACAGGGTAGCGGTTGAGATAGTGGTGCCGGACAATGCATTTAGACGGAGGATCTGCCTTCTCGCGTTGCTATGTACGAACACGTGCGACGGAACAGATTGTACTGGCAGGTGTATTCGGTTGTGTGCAAAGCGTCGAAGTGGTCTTAGCTATGACACTTGCAACTAGCCGATACACAACAAGCCGGAGTGGTCAGACTCAAGTAATGAGCGTTGTTGCCCCGTTGCTTAGTTAAGCCCGGATTCCCGATGATTCACTCCTTACCTGCGCTCTGCACAGACCCGCGAGATTCCAGCTAGATCTCTAGGGTCGCAAATTCATCTCACATGATAAAGTATTCTATCAGCCTTGAGAGGTGCAAGTTTTAGTCTAGCGAGCGGAGCGGGCTCTGGCAATCTTTGAAGTTTCGCGTTACCATACAACGCGGTCGGGAGACAGGAGGTAGGGATGCGCCCTACTCGTCCCTCTGGTCATTCATTAATAACATAGACGTGTTTCTAGACAGGTCAAAGTAATTGACAACTTTCAAAGATAATCCACTGACACTTAATTCGTAGGCTTCACTGAGTTGGTATATCAACAGACAAGGGTATAGACCGTTAAGCCGCGGCCCGTTAAAGCCAATGATATCCCTCTCGTATCGTACTAGACCGCTAAGCTGACCCAGAATAGACATGTCTGTAAACCCTCTGGTAGGGGTAGCACTCGCATTTCCGTAATGGGCCTCTTGCGGTCCACTGAGGACCGCAGTCCTCGGTGTTGTTAATGCAGGTACTTTCAAGTCTAAAGTACGAGACAACACGTTGTGATAATATACCCTACTTTAACGTCTGGAACAAATGCTTAGGCGGTGACCTCGGACCCGGTCCTCTAGTGACTGCAGCGATTATTTCAAGCACCGTATTCTAACACCGAGGAGCAAGCTGAACATCGTAATAGGCCCAAGGGCGGGGCCCCGAAAGCTTTTAGTAGGATCTTGGTGGGGGCCGATCCTCACACCCTATAGCCTCCGGTAGCTAGCATCTGGATTTCGAAGATACCAATCTTCCTGAAAACTCAACGAAAACTGTAACGGTGGACCATGGCATCCGTACACGAAGGGCCCGTGCTTCCCGTGATGCTTTTGATGGCCAGTTACAGCTCTTTCCGGGCGAATTGTTACATGGCCACTACATCAACCCGCACTCAACGTCCCCATCACTCCCCGACCGGTTAAGCAGGTCTCCTAACTTTAAATGGACGGGCCCACATGCTTGAAAGGCGCTGTATGTTTGGCGTATCGTAGAGCAGAACTCTATGGAAAATAACGTGCCTTCACCTGAGGAGATCCAAAGGTACTACAACATAACCGAAAAGAATGGCAGGCAATTTCTCCTTCTCACGCATTATTAAGCCCAGGCTTGCTTACCATGGCGGTAGGGTCGTTTTTTGAATAAAGCGACCTACTGCCAAGTCTCTATGTCCACGCTGCTTATCGTGTCACTCCTAGTCGGGCACCTCACCCCTGTGATCCTAGTTAGCCAAATGCCAGGCCATTCTCTTGTCTTTGACTAGTGTGTGATATCACGTTATGAGTAAGGGATTATCGTAACTAATACCGTAACTATACAGTCAACAATGGGCTAGGCCATCCTGCCACCCCTTCATTCAAATTACTCCTTCTCGGCAAGCCTATTAGTGACACCAAGTTTTGCCTGCTCTTATCGGGACCGTCACTTCAGCAGTTGCTACGTGGGCGCTCCTCGATCAAGTTCAATTAAATGTTGGATTGGGTCCCTGCTGTTGGAATTTATTGGCACTCATTCGCAACCGTACTGATGGACTCGCCTCTTATACTAAATCCGGGCTGGGAATGCTCGGCACTCACTAGAGGTATTATACGGGACCGGGGCCCCACATTGTAGGGTATTCGCTTCGAGTTTTTCTCGTCGAGAACTAGACACTCCTAGCGATGTGACTTTAAATTGGACGGACAGAGACGTGACAGAAAAGGGTTCCTGAGACACTACGCAATAGAACCGTGATGTTTTCAACAGCTATCGTCGTTCCTGGGTCAGGCTCCCCGCGTCACCGGCTGATACCGTTACCTATCCTTCATTAGGCCAGGCATCGGATTTGGTGCAGAGCTGGGATCACTGCACCTCAAGTTTGAGTATACATTGTTTAGGAGTGCTTCCCGGTAATTACGTCAGAGGTAGGGCGAGAGGTAGCCAAATCTCCTATCTCTGGTTGAGGTTCTAGATAATGCCAGGTATACAAGAAGGAAAATGCCGCTTGCTCATGATATCGCTAAGGCGTAATGGTTCCCACTGTGACTATTTAATATCTGGGCTTGTGACACGATGACATATATGCCATGGGGAAATTAGGTATGAACCGCCAGCATAAGACATCACCGTTGCACCACGGCTTGAGAAATTCCTTACACAGATCCAGAGTGTAAAGGTGGCTGGAAAGCGGTCAAGAATGCGACACGACCTGGCTTGTAGTTTACATAGGTGTTCCACTGGTATGTTAGTTGCGTGACGGGTAAATAATCCGGTTGGATCGCAGATACTGCGTGGTGACTTTCCTGTTCGCCTGGTGATCTAGTGTGATTCATTCAAAAGCCTCGCTGGCGGCCTTGTCCTCGGTTCTGCTTTCTATCCGCACGTCTGTAATATTCGATTGCGCACTATGGAGCCCGCTCAGGAAAACAACTGGGCGTTCCCTGCGCCAGCGGCGATCGAAACCAGTCGACGTTCCCTTACCCGACCGGGCCCTACGTTATAGGCCGTAGATTCCACCGAAAGACCTGATATCCTGACCCCATTGGGACTATTGCGCCGGCCACATAGCTTGGGTAAGAGCTGTGCGACGTCTGATTAGATTCGAGTCCAAACGGGCACCCGGGAAGGCAAGCGTCGAAGCGGCCGTATTCCGTCGAATGAAACATTAGCACCGGGGGACGGATATTCTTGAGTTGCCGATGTAATCCAATGAAGGCCATTGCCAAATGTTAGGAGTATGCGAGGGCCTGGTTAAGAGGCCTTTTCCACCCGTGCCGGCTGTAAATACACGCAATTCAGGAGCGCCCAGTGCTCAGTACCCGCCGCAAACTATCGCAATCACCGTTGGCAGCCTACCGGTTGTCATGAAGACGAGGAGTTCCCTCGTTTTTTAGCGTACGCCGCATGAGCTCCATCAAGCCCAATGTGTTGGCCAAGTTTCCGCGGAACAGCGGTGGCGGTTGCAAAGGAGCGATGGTCCAACATAACATATGACTAATACTAGGGCGCAGTAGTTGCGAGTTTGGTTCACCGTGGTCATGGTATACGTAGCTACGTTGTGTGTCGGTACCGGGTATTCTTAGAGTGGTTCTGCCCTGGGGAAATTCTATATGTCAACGCCCATTATGCTCATGTTACGGAGTATATCCTATCTTGCGCGGGATAACTAACGATGAGGGGCGGTCTATTATTGGCGAACCTTAACACGCTTTCAGCGAACCCGGGCCCGATAAAGATTCAACCAGCTCTGAAGACGTTGACAAATGGCTTATGATAGTTGAATCCTAGGAATAGAAATGGCTTTAAAGCTTGTCATATTCTCCACGATCACCATCCGTCATCAAAAGATCCGCGACATCCCACCAGTAGTGCATCATAACTCCATTGGCGCCGTGCTCTCGCGTGAGCTCCGAACTCCGTTACTTTTTCGTTGAGATTATTAGGATTAGCCGGCGCAAGAACCTGATTGTCATGGACAAGAATCTCCATAGTCTTAGCCACGGCACAAGGGTCACTTCTGAGTGAAGGACGACCTAGAAAAACAGAACCGTCCAGGGGAGCAAACTTTTCTTGGCTCATACACAGCTGGCTCCCCCGGCAAAACTAGATTATCCCCGTAGGCGCCGGAGACCTGCCTACCAAATTAAGAGGAACATGGCATGTTCTCGCTTTCGCCGCCATATTTCCACAGTTTGGACTTTCAAACATGGCAGATTGGTGGTTGGACCCCACAGAGATGCGGGATGTATCTTTAATGGACGCTGCGGGTAGATCCCGTTGCTAAATGGAACCCGAGGGCTAATAACCTTTGTGATTCTATGGCTGAACGTGGGCGTCCGGCCGCAATAGGCGGGCTGAGTAATCTGCCCTATCCTGGGTTAAGCATTTTAAGGCGGCCGCAAGAGCATATATTACACTTATTTGCCACCGAATGCTAGCGGAAGCGCTTAATTTCGTGATTCTATTAATCTCTTTCGCATGCGAATGGTCATATTCCTGACTTCACGTGGCTTGGAAGAACCGGTAGATCCGCGCCGGACGCGGTCGGGTGCGCGTAGCTAGGTCCACCACTCGAATTCCGGATCTGTTTTTTCCCAGGATTACGCCGAGGGGGCTAGCTAACTCCGAAGTCGCAGAGTGTATTACAATAATCAAGGCCCTGGGTTCAGCTTGTCGGCCGACATCAACAGGCGTCGATCGATCAGACAGCGATATCATCCGTACCGCATATTAACTCCTCTTTGAGGCGCATAAGTGGTCAATCCTTGCCGGGAGAAATCTTGACGTCCGTGAACAGTTATCTATCAGACTCTGTATCTGATCCAGGATGCTTGATGTACAGAAGCACTCCCTGCTCAAGTAAAGCTAGGCCGCGACCTGCGGTTCGTAATCGAGGCAAGAATCTCGTTGGACCGTTCTTTCCTCCACCTCTCATCTATGCCTCAAATAACGGCTAGGGGGAGACCCTGACTACGCCTGCATGAATCATGTTCCAAGATTATTTTACTGTCCTCCGATGCAAGCAAGGCGAACGGTAGACTGTTGGGGTTGTGCGGCCGTTTGCACGCTTGGCGTCGGTACCTCGCGTGAAAGAGTGTAGACACTGTGATTTTTCTTAAGAAGGGACGCTGGAGCGGACCCGTCAGAAGATTGGGAAGCTCACACTAATTGCGGCTCAGCGGATTCCTTTGTGCAGCTTACCCATCCAACTAGCAGTCGCGAACGGAACACTAATGGGTAGTAGAAATGCGGTGCATCGCGGCGAGAATAGAACGAAGGTAATACTCCCATCACCGGTCTTTAAGTCTCCCAAAGCTGGCCAATTCTTAATCAGGAGTGATTGACCTTCGAGCCTTAGGTCTTAAAACGTCCAACATGTCCCGTTTGGTCCGGTGAGTGATTCCATCCCTCCCCCTGCTCGGGCTTTACTGCCCAGATTAACCTGCGAGTGCCAGCGAGTCTCCGAATTTTCTTAGCAATGGAGCTTTAGCAGTCGGGATGAACAGTTTCTATCAAACTTGGGGGACTCCCGGCCCCAACGCAGAGCGACCGCGCACAACTATGAAAGATCGAAGGTGTAGTGTTGCAGTCCATTTCGGCCATGAGACACTACGTAGACTATCAGTAGTAAGTGACTGTGCGGGCATAATAAGTCACTGACGGACCAAATCTTCTCTCATTATCGTTCTTTGTTATAACGGCTGTCACGTTCGTTGCTTTAGCCTCTCGGATCCTTTGACAATGCGAGAGTCACTCTAATTGCTAACTCAGGTCCACACGGGAAGCTGGGTGAGTCGACGCCAGTGAACCT +3 TTGACGTTTCGCATATCTCAGTTTAAGTGACCTCCTAGGGTCAGAGTTTCTCTGGGCGATGCAAATTTATTAGTAACAGACGTAATACAGCGGGTTTGAGCTGCTCGCCGCATTGTACACTGTGTTTGTCACACAAGCAGGGGGGGAACCTATCGTAGACCGCACGTAGATAAAAACCCAAATTGGAGAGATTCTTATCTGAGTCCCTGCTGACCTTTTTGTATATTTACAGAAGTAACTTCTTATTTTGATGAAGCGCATGTTAAGAACGTATAAAGCGGCATCCTGAGGACGTATGCTGATGACTGCACTATAGACAGTCCTAGGTATCACCTGCCCACGGAACAGTGTCAAACAAATCGGATGCTCTTTGTTTCACAAGTGTCTGTTGATCACGCGAATGGCACTTAGAACGAGAGCGGGGAGTGGCGTATCGCACTTAACCAGTCCACCCCGCCGAGCGGCCCTAAATAAGGGGGTTAGCGTAATATGTATCCCTAAGTAGTCTTCATTTGACGGCTATAGAATACTGGTTTGATTACCTGATTGTACCTTGGGGGAGCGGCGTGACTGACTGAATCTATCGGCGTCGTGATGACCGGCCACGATTGCATCTTGGAAGTGGTTAACGGTTCACTTATTGCGATGCAAGTCGCCCTCATAGGACTAGGGTGCCTTCTGCCCAAAAGCGGCAAAACTGGCGTCATTAAATCACGGACCTATCCTAGAGGAGAAACACGACATCCACGCTAGATCGGGAGCCCGCTGCTGCCCCCGCACAGTCGAATATGCCCGTCGTAATAAGTTATCACTCCCTGAACAGGCACTCCCCGTCGGGATCCTAAATGCCGGCGCATTATTGTGCCTAATTCGAGTCTTGCGCGTCTAAGGCGCATTTCCCTTCTGATTGATCGAATCCTTTCCGCCCGGTACCATCCTTGTATAGGCTCCAACGCCGAGGCAACCCTTCTGAACGATACTAAAGAATCAAGATAGGCTCTACGACTTTATTCGTGCACCACCGACCGAATGGTGAAACCGTCAGAATGTACGCTGGAACGAAAAGTTCACGACGTCACTACCGAAGTTTACCCTTACTAACGAACCAGTGGGGAATTAGCTTACCGTCCAGCGGACGTCGTGAAGCGGGCACTCAAAAATACCACCCCGAAGTGACTAACATGGACAGCATGATCAGAATGGACGAACAAGAGCGATTCACTGAGCGCTGTGCCCGGTAGGACGAGTCTTTAGTCGTGCGTTCTGAAATAAGGTGTAATTAGACGTAGCACAACTGGTTATTTAACCCCGTACGCCAACGACGACTAACCTGACCGGCGGGTCCGTCTGCCTTCTATTTTGCTCATGTACATCGTCAAGTTAGATACAATGTACTAAGTCATCTTAAAACTTAGAAGCCGAACTCATTGTGCTGACCTGAAGACGTTTGGACTTCACTTAACAGCACCTGGGCCCGGAGAGGTCAATGCGTTCTTAGAGAACTACACTTTCCAGCGGTATCCCTGTTACCCTAGCTGGATACTGAAATCTATGTACTTCTAAGAGAATTAGAGTATGATGAACGGGCCTGGTGTATCAGGTGATGACACGTGACAGCAAAGTGTAAGCGTTTATGTTTGATTCAATCCAGCGATAGACAAGAAGGAATCCGAACCTTTGCGTGAACCCACTTTCAAATGACAGATCATTACTTAAGAGATGAGGTGTTGGACGGATAATTGGAGTTGGGAACACATCCTACTTAGCATCAGCTCCTAAAAGATGCACGGTATAAGTTACCAGTTGGTGCCGCCGATCTTCTGAAATGGAGCGCCCTTATGAAGTAAGGATACTTTGTAGACTAAGTGCCTTGTCGGTTACTGACCGCGACAGATTCTTCTTGGTGTCGACAGCGAAAGCTGACCTTAACTGTCGACTGTAACGAGATGGGAGATTCCAAGCCCCGAGTGCGGGAAGCACTCACTGTCGCTAACCCGATCAATAATTTGACGAGTACTAAAATGGGAGGGTGCCGCCGCGTTACCTATACTTCCGAGACACAGCCTCCGAACTTAATAGCTCGGGTAGGTATTCATTGGTTGCATCCGTAGTAGTCTGGCCCTCGTGACGCCCAGAATTCTCACGTGGAGGTAACTTGGAAACGGTGACTCCGAAGCATAGTAATGCCTGAGGGATTCTAGAAAGTGGGGCGCGCGTGGATGGCTAATAAGATAACTACGAGAGTACCTTCCCAGGCGCCTGATGCCTACGGAAGTGATGATCTTGACTCACCCGTGTATAGTACCGCCATTCGCCACCTATCAGACCCCTCGCCTCGTTTGCATACAAGTACAAAGCCCAATGGGGCCGGTACATGGTCCCTGCTGGAGACACGGCCCAGGACCGCTATCTTGAATAAGTTAGAATCGCAAGATTAGGAACGAAGGCGTTACTCTATAAGAGATCATTCCTCGGAGAGAGTTAAAGTTACCGATCCGTCCAGACTTCGGTCACTTAGTACAGATCCTGGCGCATCGTGCAGACAGGCTGGCTGTTACATCACGCTGACAGCGTTCCGCAGAGATGGGTGGCCCGCTAACTGCTAAGAAGCGGCCAAGGGCACCCCGCTGCCCTATGGACAACGGTAACGAATTGAGGGCTATTTGTCTGCGTTGACACTGGTCGCCCCTTCGGTCCAAGAAGAGCAATTTATCATGCTATCACATATGGACCTGAGGACATTGCGCCTTTGAGCATTGTAACTCACGCAATCAACAAATCCTTTAGCGGTATCGACCTACTGTGCCTTCTCCAGCTTCTGAGCCCTGGATAAATAATCGACGTATGCGGCAACCAATCGGGGTGTCTATGGTCAAAGCTCATGCATAAGCACGAAGATAAGTGGCAATAAGGAGTCAGCCAACACGTTCGTCAATTTGCGCGGATTGACCTCCAATACCGCGACTCGTGGAACTAGTGACCTAGACTTCGAAATCTTACTCTCACGCGGCCCTCCTGAGACCTTCTCGATACGTATCAGGAAATATGAGTTCTCGACCATTTACAGTTGTAGGTCTATTCTCGTACTCTGCCCCGCTGGTTCTAGTCGGAATAGAGAGACGTGCGCGAGCCCTACCGCGCCTCAGCGTCCACTCGGAAGCTTCCAGGAACAAGCCAGCCTTTCTTCATGAGAGGAATCCGATAGGTACATTCCCAGGTACGCTTTTCTATCAAAGAACGCCTTGGACATACTGTGCGCGATCCATCTCACACATTGAGCTCGTGGGGGGCAACTCCCTGCTAGTATACACTCCCGACACTCGTGCATGTGTCACGTAAGCGGGTTTGGGACTGTGATGGACCCTCGTAGCTTCAGACCGAGAGCTTCGGGCAGGCTATTGTACGCTCTCGTCCTGATAGCTCTCAATGGCTCAAGTCCCGACTCCCCGCCGCGGGTCGGGTGCGATTCCGTGGAAACAACGGCTAAGGCGTCCGGTGATGACCGACCAGAACAGGCCCGCTGATATCCGAATTTACTCCCGATTTGTTCCGTCTGATAGTCAGGGCGGCATGAGATTCGGTCGGTATTCAGGAAAACCAAGCCTACGATATTCGGTTCCATAGATCTCAACGCCATTCTGTACTTGAGGCATGTCGACGAACGTTATACGGTTAGTGGACTCTGCCGCCAAAGTTACAAGAGCCTGCTAAGTAGGATATCGCCTTAAACCTTCGATACATCCAAGCCTGGTAGTCGAAGGAGTACTAGTGTTCTTGGTTAATTTGCGATTCCGAAGGACCGTTAGAATTATCGTCGTTATTTCGTCGGACAGTTGGACTTTCACACTTCTACAGCACTGCTCGCCCCCCATTTGTACGCGCCGAATTTTAGTAGTGTAGGCCCGTCTGGCTTATTTCGTTTGGTAACTCGACAAGGACAGTGACCACTAACTCTATAGTGCGCTTATCCCCGCGACGATCGGAACTAAATAGGTTCGCGACCATTTATTTGCGGTGTCACAGGAGCAAGGTCAGATGAATGTATGATATGACTAGGCGGTTGGACACGTCGGAGTCTTGCAACGAGCGGCTCGACCGCGACCTTATCTGACCCCTAATTATGTAGTAGTGCCCGATGAAATAGCGATATGCTTGCCGCGATTCACTTCGGGAGAGGCTACGTCCTTCAAACCACGCGTCAGGCGCACGTACGAAACCCCTAGCTCCTTCCCCTTAATAAGGTAGGGCCTATTTGAGTGGACTGTGACCAGGGCCCAAACAAGCATGGATGGCTCCTATTTGCTATCGTGTAGAACTACCTGACTAAAAGTCCCGATGATTGTACCGATTTTGGAGTAGTCGGCAAGAGACTCCTAGCTCCAAAGGTTTCACTCGCGCGCGAGTCACACTTACGCTTGAATGCGTGGAAGTTGTTAGAACCCTCGTCCGGGATCAAGCGCCATATGAGGCCGTTGTCGAACTAGGTTGGCTTTAGATGAAACCCGGTAACCTGGTGGATGGTGCCAGTACTACAGCGCTGACAAGGTTAATGCATCAATGCCCTGTTCTTTGAGACCTTTCCCACAGGGCTCTATCCTCTTAGACCGGGCAATATCACGCCCAAGGGGTTGGCTGGCGACACGTTAGTAAGGTATCTTCGGTTACTTGTAGTTCAAATTGATCCGTCCGTCTGTACCAAAACAACGAAATGCTCAGTACTTACACATTGCCGCGTGTTAATCTACCCGGAATAGACACGCGGGTTCAATTCTTGCGTGACAGTATATTTTGAGCACCTTCTACGATCATGGGTAATCTTGGGAGTATCGCATTACCGCTGAACTCTATTGGAAGGGAGGCCGTTGCTATGTGTGGGCCAGGCCGCGCATGTCTGCATTAGAAAACGGTAAAGCGAGCAGACTTAGGATTATGTATTGTAGTACCTACAAAAGCTGTGCACGGAACGGTCTTATGATTGTTGGATCGATGTGATTAGCCAACAAACGTGTGCATATCAGGCTAGTCCCAGAGCCGATAAGACTGACCATAAGAGAGTATTTGCGTCCAAGCTTGGCAAAAGGATGCATCTAGGTTTGGTTGCAACCGACCCCGGAACGGGCCTGCGGCCCACGCGTCGCATGGAAGAGGTGTGATGGATCCAGCCGACCTGTCGATGCCTTGAGCCCATAAGGATAGTTGGTTCACAAAGTGATAGGCTTCCCGACATAAAGATATTATACCACACAACAGTCGTGATGCACCCGATAAGCTCGACTCCCGGTGTCGCGTCTCATTAATTTCGGAGTGGAATTAACTCGCGACGGATTAGCAGCCCGGTCGCGCCACTCTCGAGTATTAGGACTGCAGATAACGGCGCAGAAGGGTTAGTTCCGATCTGCGTATAGCCTGATGTGCCCCATATATGATTGTAGGAGTCGATCGACCAGGTGACGTTCATAGCATCGGCAGATTGGTATCCCCTTACTTTCTAACGTTGTGAAATGTCCCATAACCGATATCGGCCATCCCTTCGTATAGTACAATTGGTGTTCCGTTAATCTTCGGACTAATTGTTTTCTGAACTGGCCGTCAAGGCGGAAGGATTGAACCGAGACACAGCCACCCAAAGCAGATGTCCATGGGGCCTGCCTCCTATAAGAGATATGCAAATGTTCGCAGCACGCGCGGCTTTTTCGGGCATGCATTACGCATAAGTCCCGAATTTATATTGGGCCGTGTCGGGAGCAGAGACACGCTACACTAGGCACGCTCAGGATTAGTGAGGGAACCAGAATTTCGTGGCCTTTGCAGAGCGGACAACCCCTGTTCTCTCCAGCAGACGCGATCCGGTCTTTAAGCGTTCGTAGGATACCGTCATATGTCCTCTCTGGCTACTGCGGGAAGACGAATTTTTCGCCATGATCTTGTTCTGAGATCTAATAAATGGGCATCAGTGTTGCAGTTCAATCGTTTCTATTCGAGGCCCCGTCGGGTTCTATTGTCTGTCCTATCAATTCGCCGTTATAGATAGCTTACTAGGGACCGGAACGTCACAACTCCTGTTCTCCAGAAGTAGCGCCGCCTGGCAGGTCAAGTAGTTCCCTTCTTGATTGTGGTCGGTAAATGGGGCCTCAATCAGATCACCTGTTACATGTGCGTACCTGGGTGCATTGAATGATATTTAATCTTGTACTAACTCCCGTTGGGAAGGAGTAGGACACTTAGCAATCTTCACTGCAGGCACGGTATAATTTCATACATGACGGCTAGCTAGTCCCCGCTCTCTACAGAGGTACTCTGAGTAATCGCGGCCATACAGTTATAAGCACGCGGTAAGTAAATCTTCCCTCCTACCCGATTCGTTCCTAGCAACCAGCAGTGTCGGTGCCCGATTTTACAAGGGGTAGCAGATACACCCCGCGTCCGTGCTAATTAGACGCATGCAAGTCCCAGCAGCTCACGGCCTACCTTAACCTCTCTGCGAACGTTATCTTAAAATTAACTGTCGTTTTGTGAGTTCGCAGAACCACATGATGTTTGCCAAACTTATCATACCCTATCATGAGTACTGCATATATCTTAAGTTAGAATGAGGATTCGCAATTAATTCCAGCGATGATCACCCGTGAAGGAAACTTTCTTTACCGGGCACACGATATCCAGACGACTTAGCAGGGTCACTTGAAAGAACGGGCTGATCAAGCTATCGGTATGGATTAATGGATGTGTGCGTAACTTCTTACTACTTATGGAGTGGGAGTATTAATATGTTATCTTGCCGCCGGGCGATCGGCAGCTGCGCCGCACCAACCTCCGCATTGCTGTGTCAGTTGTAAAGATACTAACACGCTACGTTAAACCTATCTTACCGTATAGCCTATCGCGTTCGGGACATGCCCTGTCTCGGATTGCATAAAGTTATGCCACAGAGATCCGGTGCGTTATTACGATAGAGCAATCAGACCAGGAGCGGGTGCGCCCATGTTACACATGTTCCCTTCCGGTGATCCAAACTGACACGTTCAGCAACGACCATAGAGTCGAAGATATCCGTATACCCTACAGTTCGGATGTGATAGTGTTAGGATGACTGGGAAAGCGGTCATGGACGGTGAGGCACCCAAAGATCACTCCTGTGGCGGGGATGTTGTTAAGTCGTAACGCCCTATCTCGGTCAACCCTTAGACAACATCCTGGGGCTCCAACGCTGACGGCAGCTCTCCCGCTCACCCCAAGTCCAGTGCGTACTAATCACAGCCTTACTAAGCTTCAAGGGCCCCGTGATTCGGGGCGATCCGTAACCGCCACTGGCAACTCAGGGCTTGACGGCGGAACGGCTCCCCTAGAGGTCGATGGAAAAGCGGGGTACCAATAACCAGATGCCTCCGATCGCGGACCGTCACGGCCATAGGCAAAAGCTAGGAAAGGAGCAGATCCCTGGAACGGGCCAAATATGATAGGGGAACGTAGGAGATTGTGTGGGCTGATGTCTAGCATTGGCGCAATCCGATACCCGAACGTTATCTGACTCCTGTCGGCGTTCACAGGTGTGCAAGACCGACCTCAAGTCCCGCATAACTCAGGGTGGGCGCCATAGGCGTCACTCACGCAACTGTAGGGGCCCGTATCCTCGGGTCGTGATTTGGCTGGCCAACTCTGAAGATGGCCGATCCACACGTATTAATAATAGTAAGGCATACTTTTTAAAAGCGTTGCGCACTTTGTCTGTTGCGGAACAAGAGATGTCGTGTGCAGATTATGACTATAAGCCATGATATCGATCCAAATGCTGTATTTAATTGGACGCATCATGCTTAGCCCTGTTTATTTCTATATAATGTCACGCGCCCTCTCCCGACCCATCAATGGACTTCATGCCTTGTATAAAGGACGGCCATCTCTATATGTCCCAGGCCATTTTGGAGCCCTTTCAGCGCCCCCGGCTTTAGGCAAGTGGTTCTGACAATCGGATGTGTCCAAAAACAGTATTCTGTAATACTCTCACTCAATAAGCAATACCCATCACCGTTTAGTTTCAGCGCGCTCTCAGGTGAAAGGACCGCCGGAAAGTGTATCTTAATGATACCTAGACTGAGCAGCCAGCTTCGTTATTTTGCAAAGGACAACAAAATTGCACATCGGCCTACGGGAGACAAGGTGTTAATGGAATAGGGGTACGGAATTCTCGCGGGGACAACCGTCAATCAAAATGCGACAGTGCCGTCTCAGACATGGGGGATACAATGCAGTTGGTTAATGCCAAGTATCCCTTCTATGGCTTCCCTGTCTTAACCCGTTCATACTATAAGGAGACGATGTTTTTATGACATAATGAAGATGTGTATTCCCGTGCCGCAAAAGTTCAAGGTCGCTAATAATGGCGGATATTCTTAACCGGACCCAATCATGCAAGTGCGGTCCGCTGACTCCGGATTACTCCTTACGGGGGGCCTAAAATATACAGAGCCGCGTACGGTTGTTTGGAAGCTGAACTTACTCGGTGTCTTATGACTGCAACTCGAAGTAAGCTTCTAATAGACCGGTCGAGTAATGTCTAGTCTCGTACGGTAATCTCAAATAGGTTGCCAAAAAAGAAATGTTAACTACTGCGTTCGAATCGATGATCTCGATTGTGACTCTACCAAGGGTTCTTGTATGATGCTTTGTATAGCCGATGACAGATTCCTCACGTATGCGTAAGTGGAACAGTATTTGGCGTACGCGGGCGAGCGGTAAGGGGCTCGTGGACGAGCGAATAATTTTCTATATGGGAACTCTCCTTGCATGCCGGTCCAATAGGGCGAACCCGACAGATTCTGTGGCAAATTGTGGTATAAAGTTCCAGCTAAGGGTTCCCCGGGGCCGTATCGTAAGAATAGGTGTGCACAGAGTCCAAGGCCCCCTGCCCGTTATCGTATGATCCATTAGCTCTGGGCACAAGTTGGCTATCATCTGGAACACGCGTAAAGTGGACGTTCAGACATTCGTACGTCTCAATGCGTCAATGGTAGAGACCCGGCGTGCAGAGGTGTCATACGCCATGCATCGGCGAGGCGGATCGGCGTTAAGGGGTGCGTCGGAGACGCGCAAAACTGATTGGACTTTAAGGACGTTCACCGCCCCGCTATTCCGCGAACACTGTTCGTCGCGATCTGCTTAAGTTGCGGAGAAGAGGAAGTAACACAGAGTAACGTACATCGCTCGGATTCAGGGACGGTATTTGATTAAAAGGGCGGTATGCGCATAGTTCAAAAAGTTGATAGCTACGTTGCATCGTCGACCATTTGCAAGACGTACGCTTTCCATAAGCTGGGGTTTGGTAGGATTCTCGTTGTACGGCTCCCCGACTATGGATAGGGGGAAAGAATTCGGCTCCTCTGTGCCTCTTACACTTAGGACCAGGCATCTCCTACCGCCGAACATTTTGACGCAGTATACCGGTGCGCTACGGCGAGAAGACAACGACGCTTAATAACGGCTGCCCGCTCTTTTGTAATTGGGAGGGGATCATCGTCCTAGTAACGAGTCAGGTCGACGTTAAAACAAGGCCGCTAATATGTAGGCGAGTACCAAATGTGCGGTAGAATGAAGCCAGATCAACTTCTGCCTGTGTTTTCATTCCAATGTTTTACCGAGGGTCCGAGAACGTATACCTGAGTCGCTAGTAATACGGCTCTTCCATTATTTCTGGTCGGTACCGATCGCGCCTGGGCTGGTCGTAGCGCCAGATTTAACACTGTGAGCGCGCCCTCGTACATCTAAACGAAAGCCGCCCAGGACCATTTGGGACATGAGGCAGTCCTTGGACAGTAAGTACCTTCCTGCTGGCTGACTATCCTTCTCATCTTCCATACTAACTCTACGGGGCCCAAATATCTGTTAACGAACTACTGTGAGCCTGACGACTCCTGTTAGTATGATTGCACGTGCTAGGTGCAAGCGAACCTACCGCTCTGCGAGACTCAGGCTAGAGTGTGCGTGAGTCTTCAGGCAGGTATCA +4 ATCCATGGGATCAAGCAAGTGATATCCCCCGTATCATTCATCCAGGCTTCCGTTTTAGACCTTCGATGCTCCCTAGGATGGTATAACGCTAAGTGCACGCCAATACTTGTGTCCTAGCGCTCGGACATTGTGGGTACTCTAATCAACATCTAACAAAGGGCCTCAGATGCGCCCTGTTTCCGTACCGACTCAGCGTGTCGATGTGGCAACCTCTACGGGTTGCCATCCTTGGAACCCCAACGCACGGTAATAGTTGATGCGAACCCCACGGTTCGCACAAGAAGAGCATGATCTCCAGCACCGCATCTAGGTCTAGATAGCCCCTTCCTTTTAGTGAACCGTACCCACCGACGAAGTCCCGTTCCCTATTAACGCAAAGGAGCTCGCAGTTATAGCTAAGGTCCGTAGCACGAGCCGTGATGCAGCTTGTCAGCCATAGGAGGAGGGGCAGCAGGTAGTAGTAGATTTGTACGACTTTACGTATGCCGCTAGGTCTACGGTCATCGTCACATTGACCAGGACTAGATCCGCCCGGATGGCAACTAACGAGACTCAAGCTGATTGCACGGTCCTGGTCAGCATTCACTGTACCTCCAATTTCGCGACACAGGAGTACGAATCCCAACTGCGGTACACGTGCGTATTGCAGCCAACGTAAGCCGGATATCTTGCAGCACATATGGTGCGATTTTCCCCGAATGAATGTTGGTATAAACCCCAAACTTCGGGCGGATGAGCTCATCGGACGTTCAACGATCTTTGCGACAAAATCACGCGGACATGGGTATTATGTGGCCCTCGGTCCGGCTATTTCTGCTACTTAAAGCTATCCCGTACCCTCTTTTGAAGGTGACAAATGCGGCGGGATAGGGCCTTACACGAGACCGAACCCCTAATGCGTACATTAATGTCAGATTCGCTAGCTCCACGATGCTAAGACGGTGGGCACTCAAACTGAGTGCGAAGGTTACTGACACCGGGACCAAGTTCACAGTTCCGCCCCATGTTAGACTGTTCTGTGAACTTTGTCTGGTATTTGTGGAACCGAGGATATCTAAGCCGTTTTTTAATAAAACATCAAGAAAACCGTCCCAGGATGGGCACAGAACGAGTCGAGCGACTTGCCGCCCGTTTACTTGTAATGTATCAAGCCTCATTTCTGCACATCATTTCAAAGTTTATGAGGAAGCCCCAGACGGTCGTGAGTTAGTACCCATTCAGTCTACAAAGTCGAGAGCACATGACTATACCCAAACACGTTTAGTTAAGAACGTTTGTACGACTGTTTGCTGTATATGTGACAACTGTCGCCTCAGGTTGTACACACGTTGCCCACACCATCATGACCTTACCTCCCGCTTTTTGAACGAAGTCGCGCCACCTGTCGCAAGTAGTTGTGCGAGGGAGTTCTATTTCCGGGATGATCATGAGGTAACCATCATGCAGCTAAGAAGTCAACACCGCGGGAGAGACAGAATTGTTATAAGGCGGCGAGGAACCCTTTCGGGGTTTCCCACTGTTATGTCTATGCATAGGATTGCTCGGCACGCCGCGTATTCATAACCGTCCGTGAATAATGTCGAGGATATGTCAGCCACCCCGACCGGAGACGTTACGCTTAATCTAGCTAAAGTCTAGCTCAACCGTAAGTACTCCATGGGTTCTACGTGATTCATTTATTTCAACAGTCTGGCCCTTGAGGGGACAACTCCGATAGTAGGGTATTAAGTCCCCGCCAGTATTTCCCTCTCTTAAGACAACTAGGCGGCCTTTCTCGAGCCATTGATGGGATAATTGTGCTAAAATGTAGCAGATATGTACGTTCATATACACCTCGGCCTCATGTCTGTGGGATACCAAAGAAGAATCTAACGAAGCCTTCGCCTAGGTAATTCAATAATTCTTCTAAATCCCCCTGCTTGGCTATCCTAACCTTACGCATGCGGCTGTAATCGATATGAAATATGATTTGGATGTGTTGGCGCGGACGGGGTCGTGGCGACTACGCCTCTTTGCGCTTCACCCTTGGAATTTGCCAAAATTTGCATATCTGCTTCACCCCTTGCTCCAGGGGCCCAGTCATAGCGTCCGTCGTTGACGGATATCCGAGTGATCTAATCGATGTTTGTGAACGTGAAAGGGTCCGCACCTAGGCCACCCTTCTAGTTTCTCAGCGTCTCAGCGTCTTGCCCCAGGGCGACGAAGGACAAAGTATACTCACGAGGCCCATATGGGTTAAATCTTGAGCATCTGCAAGTGCATATCTTATAAGTTGCTCACTAACTTCCACGTGAGAGTCTAACTAACGAAACCGATGCAGAGCGCTCTCCAGTACTTTCTGCCAGACTTCAGCGGAAGTCGGCAAAGTCACCCAATCCTTACACCGTTCTATCTCCAATGATCCTTTCCGACTTCTCCCCAATCTTTCGTAGGACAGGTTGCCGGACCAGGACAGTGGATTGAGACTCCCAGGGCGATTGTGCCGCGAGCGCCTCACAGCCCAGAGTGCACGAGTGATTATATCATGAGCACTTTATCGTCGGCCGATGTGGGGCCTCGGCTGATCACTACATGAATGGGCTGGCAGCTTGATGGCAGGGTACCATTGAGTCTCCATACCTAAGATTCTGATGTATTGGGGATTGGGATAGAGTAAGCGTCAGGAGACCGCAAGTGACTAAGTTGGATTAGCGTAGGTGCCACTGTTATGGTTACCGACTAGACCTATTTCGCTGAGTCAGGACCAGATATTGGTTCTGTTCCACTCAGGTTCAAAGATAGTGGCCGACACGAGGTTTGGCCCTCAGGCCTAGTATGTGATGTTTCCGTGTAGCGTGACGCTCGGCTTCATCGTACGTGTCCAGCTCGACAATTAGTCAGTCACGGGGAGATATGGGCCAAGCGGCATCGCAGCCAGCCCTAGGAGCGCCGCGTCCTTCAATCACTGAGGGCCCGTTAAGGGGGAAAGGGTACATATGAGAAAGGGGGAACGTAGTCAGGACCTCTCACTCCAGATGGCGGTGCTAGGGGATGAGACGTCTAGATTCAACTTCTTGCATTTCCTAAGTGGCTGTTCGAATGTTTTGGCATAGGAGTGAGAGCTGTGGACCCAGGTATGGTGTCGACCTCCGGCTTACTCCATAACGGAGCTTACTAGTGTCAGACTGGTAGCGATCGTGCATTATATCGCAAGACTATTCCGTTCCGCGCATGCGGAGACAAATTCCTGCTTGAACTAGGCGGACGCCTACTATTCGTAATCACGACGTAAACGCGCTCTGCCAATTGAGCTAATACTTGCGACTCTTACAGCACAAATAGTAGCTTGTGCCGATCGCTTTAGCTATGTCCATGATCACACCGATCTGGCACTGCAGAGCACCGCGAGGCCATCCCCTTCGTGAAAAAATTGATTCTATCGCGTAAATCGTCGTTGGGCGTCGCCGACGATGGCAGCAGTGCAATTGGAGGGCTGAGACCGATAGCGCTAGGAGTGCTCCACCTTTGACGGTCTCTCTCGTGCGGATAACTGTACCAGTTTCTGTGTACTGTTATCGTTAAGCCATCCGGACTGGTGTTTGTGGACTACGCTCCATTGATTCACCAAAGGGCAGGATTCTAACATCGGTTTTACTTAGAATGATCTAATGACTAACGAGATAGTCAATCACCTAGGGCGAGTTATATCAGCCCTATTCAAACTTGTCTATCACATACCCGTTGATCCACGGTGCCGTACGGCTCCACAACGCTCTCTGGCTACTCGAATAAGATTTCCAATATGCCTTGGGAATTCTAGAATATACCGAGCCTTAGTAACTGGAGACCCCACATGGTGACACGTCTAATATCCCTTCTATGTAGAAACCTTAAGAGTAGGTTGCTCTAGGAAGGAAGCTCTGCTGCGAGTGGTGATAGTTAGTGACCCACAGCTAGCTGCTGACGTAGAAACATACCCTACGCTCGCCGTTAACGTAGATGATTATCACATAGCACTCTGCGCGGACACTCAGAATAGGCTACGTAACTCATACTCTGACCCGGCGTCAAAAAATTTGTTTTGCTGTTTACTCGCAAGTCATTTGGCAAGCCCGAACCTTACTAAGCAAGCGTAAATAATGCGTCGGCGCAGTCTTCCTCCATTGCCAGACAACAAAATACCACACTAAACAGCCGTTTCATTATTATTGCTAGTTCACTAAAATGACGGGGGTAGAAAGCTTCCCCCGAAATAGTTGTTCTTCGGAATTCAAGGCAGCGTAACAGTCCACTGGAGCTCTGGCATCCCGTCACGCCTGTCTAAGCCCTATTTAGTTACGACCGGTTCACACTTAATGCAGGCAGCGGTCCTCGTCACGAGATGGTTGGCATCAGCCATTTTATGCGCCCTTAGTACGTATTATGTTTGGCTACCGTCTCCGATGATGGTAGCTCAGGCCGTGGAAAGGTGAAATTTTCGGGCTGCCCGGTTGAAATTACCCGTTCGCTTCGTTCGGATGACTCGCTCCCAAGATCAATGTCGTGGGAGGGGAATAGTGTTCCAAGTGATTGCCCACGCAAGTGACCCCGATTTTCAGTATGTAACAAGGTTGGCGGTTGCACGGGTGAACTTCCAACTCACCACCCGCTGGACGGCTCTAGCTTGTGCATTGCCTTAACGAGGACGTCCAGGCCCTGCCGATTCTAAGAAATGTCAACCAGTTTCATGGTCATGCTGTCGGAAGGGATTGACGCGCGAGCATCTCGTAGGTGAGACAGCTTTATGCGGCGACCAAATTGGTAGATAACCAGGCATTATAGGTGTAGCGGGCCCGTTTCAACACAAACTGACACAGAGGTCTCCATCCTGAGGCCCTGTGATGGTCAACCCTCAACCGTATGACTTGTGTGCTGCAGCCTCCTTCAAAAGTAAAATATAAGAAATGTCGGGGGGGTTATACCGTGTTATGCCGCCCAAATTCGTCGGACTCCGTGTCGTCGTCAGGTGTGCCTTCTTCGTTACGATACGATTAGAGGGAAAGGTTCCCCAACTCGGACGCTACACATGTGGCAAGGGCAACCCTCGTAATACTAACTCAGATTGGATCTTACTGACTTTTTTTCAGGTCAAGGCGTAGATGTGTGATGCACGGATACACGCACTTCTAACGGGGCGACCGATATCACCCACGTCGCGGAAACTCAGCACCCTTCCCGTTTATCTTCAGCTGTAGAATCCGGAGCCGACTCCCTACCACAGAAACGGCATCTAGTTACGGTTTGCGAGCTCGGTCCTTGTTCTGTGTATCACTTACGCGCGTGCAACGGCCGAATGTACATGCAAGGTGAAAGCCGCGACCAAAGCCGACTAAAGGCCAAAGATGTAAAGTCAGCCAGACCAAAGGCAGCAGTACTCATGCGGTGTCAGAAACTACGGTGGACTGGTGGATAATACGACAGGCAACGGCAATCACTCATGATAACGCCCTAAGCGACAGTAGGCATTCTACCGGCTCTGTTCCCGGGTGACCGTGACCATATCATTGATGCGCTGTCTTGTCTGTCACGAGTACCTACATGACTTATGAATTCCTGCGCTCGTGACATCCTCTTCCGTACGGGCGACATAAGAACAACGCAAACTAAAAAAGCCGTCAAGTAACGCTACGCACTGACACAACATGGTGATAGTGAGTTTTCCCTAGCTAACACGACTTCTAGCCTTGAGAGGCGGCGCCTGTACGAAAGTCTATAGCCTTTGCGTCGTACATCATGGGGGACGAGTTTTTATCACTTGAGCTGGTACATGGTTGAAAGACCTCGCTGCCCTTTAACCGCTGATACCACAGTCCCATGTCCACAAAGTAACGAAGCGGAATAATAGATCGTGTTGACATCGCCGAGTGGTAAGGAGACATATGCCCCCGTGTTAGATTGAATGCCTCTCCCTGCTCTCATTGGCTACCTCACTCCTAGGTCCACTTGGATTAGCCCCTGTTCGCCCGATCATAACTTTTGGGTTATTCATTTATTCTATCAAGGTATCACCCAGTCGATGCTGGCCTGCCTTCCGTTGCTTCTTGTAATGTCTTACTCTTGCTTCCGAATAGTCGCATTCGGCATTCCACTTGTCCTTTGAATGCACCGTCTCAATTGCATCCACTTATACTTCAGTGCTCCTTTACTGCGCGGTAGTCGAATTGACACTGTTCAGTTTAATAAACCATGCGAGAGCTCTATTAAAATAGACTTCTGGTATGAAGGGTATCGAGCAAACAAGTATACAATTCTACCATAGGATGTTCATGAGAACGTTGCAGGTGCTGCCAAGTCCGCTCCCGTTGTGTTGATACCTCTTCTTCACCTTTATTGGCATGGTATCCTAACGTTGGATGAAAGGACTCCAGGAGTCTATCCTATTTACAATTCAAAGAAGAATTCAGTTTGACGTATCCTCGTGAGACGTCCTGCATCGGGTGAAACGGTTTCACGCATTCGAAAGAGAAAGGCAGCGGTCTTGGGCGAACTGTAACGTCCAATAAGGCTAATTCTCGCTCTGCCTTTTATTACACTTGCTACTAAGCGCCGGTAGGCAGATCTATGAATAGTTATCCCAGACATAGACGTTCGCATGCGGGCAGTTGGCTCAAAAGAGGTGGTACCCTGGACTACAGGCAACCTATTATCTCTATTTTGGTCGAAGGTGATCCACGACGAAGGGTCAGTCGCCTTCTTTAATAGCTGATCGGCTTGGACTGGATGTGAGTACACCCATGCCTAGCGTTTTGCGACGCTTGCCGCCCGGTAAGAGATATGACCAAACAGTCGCCAACGAGCAACGGTGAGGGCAATCACTCAGTGGGGCAAATGCAGTGTGGACCGCACATGCCTGGCTCTCTAAGAACGATTTAGACCGAAGAGCGCAGGTCAAAGTAGATTGTACTCGGAACTAAGGTTCTCGGGTTGAAAATAGTAACTCCCTGACAAAAACTTGAATTTTGCTAGTCATTGGCGGCCCAAATCACATTTCGACCTGCAGACGTTTCCGGTGTTTGTACGGTACTTCCTGCGAATACGGGTTTTCGTCACGAAATTTTTACTTCAACATTTAACCGGGCATGAACCGATAGTTAACTCAATGTTTGGGTCTGATCCCGCCGGGCGAGGCGGCAGGCACTTCGTCGATATAGCATGGATCGCTATGATTGAATAAAGGAGTAGCCAGGGGCGACCACTGTGAACACCTCTACTGGTCCAATCCGTGTTTGGATTATTAGCAGTGGAGTAGTTCTGCGGTTCTCAACTTTCAGCCCGAATTGGGTTAAAAGGTATAGCGTATGCGCCATGATATCGATCAGAAATCATCCGGAGCAGGTACGGAGTTGAAGACGTAGCTAAGTTCCTTTTCGACAACGAGGTCCCCGAACTAGGAGAAACCCTCATACTATTACGGGGTGGTAAGCAGCTTGCGTAGCGGCCTGCCATCCCTGGTACTTACCGGAGAAGTCGGGTATAACCTCTATATCTTATGCGTTCACCAGTAAGCTTTGGGTCGAACTACAATCCTTCTACGGAACATTTGAAAAAGCAGCAGAGTTGAGTTCTAGCCTGGAACGCATCGACTCGCTGTCCGATCTCTCTTTTGATCAGTACTTGTATCTCGAGCTATATCAGACCGGTGACATGCGCCATATTTGAAGTAGTATTCCATGACGTTGGGGCTGCTTGGTCTGCGCCTACGTCTAGCACTATCACCGCCCTTCCTTGCAATTACCTAACCGCTTAGGGTAGACCTTGGGAACACGTCCGCCTCGTTGAGTACTTCGAGATCCGTGAATGGTCGGTGATCAAAAAATCGCCACAGAGGCGAAGATGTGTAAACAACCTGCAGTACCAAATGCTGAAGATAATTCACAGCCAAGCCAGACGTTGGGTCTCGCAACTGACGGCGTTTTGATTCCATTACTTGCATAAGGGAAGCGGACCGAAGACAGTCTCACGTGGGAACGGTATCGAGATCACTATAAGCAAGTAACCATCGTAAATCAAACGATCTATTAATAGCTCGAAACCAGATGTAAGATGGTCCTTGCCCTGCCGGAATGCAATCACGAATATTGCGGGTACACTGTATTAGTATCGATCATGCTCTTTTCCCGTGAAGTTACGTAAATTTGTATATCCGGGAGCACGAGCAGAATGACTTTTATCCGGCCCGGCGACTGTCCGGAATGAGTGAGTACAATACCAACTGACTGAGTAGTATTTCTGGGACTCGGTTACCACTTTTAAGTAGCCGCATGCGAGTCACGGAGGTCTCCGGTTCAGCAAGCCTGATGACCCTCGGGGAGGACGATTCAAGGCCATGGATGAGAGTGGGCCGGTATTGTCAAAGTTGCGTTATTGAACCCGAAGCGCAATCTTGGTCTGTTAATGTATTGACTAGCGTCGGGAAAATTCTTTAACGCAGAGGCGTCTTTGATTATCAAGCGCTTGAAACTTTTTGGCGGAGTGTCACCACTATTCTCGCGAAAGGGATTAGCAAATTGAATTCAATGAGGGCACTGTCCTCTTAGCTTTCACCCCAGCCCTCTGGGTTTCCGCGTGGGTGACTATGATGTTCTACTCCCGGTCCGTGTTGGTGCACTCATGGTGAAGCTCACGGTGCTTTGGTCACCCTAATTACATTGAATTATGCATTGAAGTATGCTTGTCCATCCCTTTTATGTCAAGACGGCGCTCCGGTCTACGGAATATTACTACACCTACTCTCTGTGCTTGTCAGACTAACAAAACGACTGTGGTACCATATTTGAGACGGAATCTTGAACAAAACCACAGTCACTCACTAGAAGGACCTGTGGCGCGATTGGCCCTTTGGTGCAGAGATTACTCGAGGTGATCTGTGGCGGGAACCGCGAGCTATCGTGGTTACTTATAGCCGGGCACGGATCGCGAACTCGGCAAGACAGATGTCGTAAACACGTAGTGTTTAAGGTCGTCTCCCGATGCCCAGATTTAACCCCTTTCGCCTGACACATCCTTGATGTCCGCCGTCCCGAGCAGCAGTGCACGATCTACGTTTACGCCTCGCGCTAGGGGAAGGCGGATGGACGATGGTTTCACGTCGTGTATTGAACTCTAATCACTCAGAATAAGCATCCGTAAATCCGCTGGTAGACACCTACGCTGAGGCCACATATATGCTGTCTGGCATCCAAACTATAGAAAGCAAGTGGGCTGCGCCGGTAGGAACTCTAGCAAATCGTCTAGAGTAATGTACGGTGTATGAGGCCACGGGCGATACCTGTACTAGAATACCTGGTGTCTGGCGCCGGGCGGGGCATGCACGGGAATGCCCATCTATTCCCTGATAGCCATGGGCCCATCCTAGAGTTTCATATATTCACCCTAGGCAGATTTTACCTTAAGCATAGTTCTACATTCTTAGACGAAACGTTGTTCTGGCGCGTTTCTCATACCATGTATGCTTTAGTTACACCCGGGGATGGGCATTGTATCCCATCCGGTCTTCTTGGCGCCTATGCGGCTTTGATCAAAAGAGGATTTGTCCACGCGTGCCGTTCCGTCATACGATATGGCAGGCCAGTGCCCAACGAACTAAACCATGGCATAATAATCGCCGAATCGGTGTGATCGATGTACGCGGAAAAAGGTAGCCTGCGTTATCAGCTCTGCTGAATACGACGTTTTTTGCTTCGAGTGTAAGACTCACGTATCGAACCTCCGTCCTTCACTTGTAATGATGGCAGGCCGATCAGTAACATCGAATCGTACTATATCAATTGCAATACTTGTAGATGTCCGGCCTCGACAAACCAACTGTAATCGGACCCTTTCTATTTGGAATAGAGTGCGCGGTTGTTCGTTGGTTTTCCTTTATTAATGGGAATTATCAGGGTAAGAGTGTACATTTACATTATAAAGCAATGGTGTCTGGTTGTAGTGTGTCCCACACATTCACTCGTCATTTTAAGCTTTACTCAGCCTGTTGTACAGA +5 CTTCACCCGGCCGCTCGCGGCTCGTATGTCCGACAGCGACTACACCATTGTCCCTAACTTTAAACTGTAGAGCGAATGTCAACGTGGCCGTACGTCCCCGGGTATGATCTGCGGCATCGTTAGCGTGAACATGCGGTAGCCAAAACCTTATTGACCAACCTTTGGGAGGAAACCGCCTTTACAAACGCATTCTCACGATACATTCGGGGCGTAACTGTCCGAAATGATCCAAAAGAATCGGGAATAATTAGTACCTAATCAATTCGTCGTCTCGTAAGAAGGTAGCCGTTGTGTGGCGTCACTGCTGGAGGGCACTATCCGCAAAGGTTAGGCCGGGTGTACCATAGCATCCGAATTCATCCGATTGCGGTATTCCAATAGCCGTTGAATTAGTGGAATAATTTGAGGCTGGGTCTGATCATCTGACTATTATGCCTGACTCCTGCGGGATTTGAGCATGAGCGCGCGATGAGAGTTTATAGGGAATACTGGAGCGTGCAAGCTGTGCCGGCCTCTACTTTATCAACTTGGGCTCAGGGCAGGTTGTGCCGATTGTGTGTTACGTTTAACTCTGTGATGGGTCCAGGCGTATACGCAATCCGTATACTATAAAGGTACAACAACTCCACGCGACTAGTGGGGATTAATATATGACACCGGTCTGCGGTAGGGGAACCCGGGGTCGTCGGTAAACCTCGCTCTCCTCTGCTGAAGAGACGCCTGGCCACATGTCCACCTCAGCGGGCCGTACAAAGTCTTACATCAGTGATCTGCGTGCTACAAGAGTGGTAAGTATGACCTCTCTGAGTCGATTGCACTGCACTGCCGCTTTGTATGAAGTACTAGCACTGTAATGGTTTGAACAGGGGCGGCTACGCACCCGCCATTACTAGCTACTCTTTCACGACTTGATGGCTAATCGGCAGGCCTACCGTCCGCGGACGAACTCGTATTCGGAGAAAGGTAAACGCTAAAGCCGTTCGGTGCCCTTCACAGTCATCGACGTCCAGTATACCCATTCTGACTGAGCGGGAGGTAAAGCCGTGAGTATATCCGTTCGAACGGCTATTCCAGCGTACATCTTCCCCGTAGTAGAACCGTTGCAGATGTACGTAACTTCCATTATTATATGGACTCTGAACTAGGTAACGGGTCCCGTAAAGGCCGAGATAGATCACTGAAAACCCAACATGTCGCTCGGGTCCCTAATCTAGCCTCGTAGAAAACGGGTACGGAAAGGACCTATCCTACACCAAAATCCGGCACATTCAACTCAGACTTATACGATTATTAGAGTAAGTGTCCATTTAGCTAATACCTCGCGGACCCCCGAAGATACGGCGATTGATGGTAACTCGGTTTCGCATTACATCCTTTGTCTATTCGACTAAGGGATCGTCCCAGGCCCTGAACTCCAGTGAACACCCACACCGGCATGCCATGGTACACACAGTCGGAACAACTATAGAATGACGGTGGATTAACCCGAATGGAGTCCGCACCTCAGGTGTACTACAGGTACTAGGTTAGAGCGAGCTCCTCTTTTTTCTTATGGTCAAGGCAATGTACAACCGTCCGATGATCGCGCGTGTTGCGGCTCTTTTGGTCCACAAGGCTCAGGAACGGGCGAAATATCCCCGAGCGTGTGGCAGAGCCTAGGCGTGACTTACCGACGGCGTACCGGCTCATACGTATTTTACCGGGCGCTATATCATAAATATGTAATAGAAACGGAGAATCTTCCCGCCGGTTGGTTTGTTAGCAACTCTTTGTATATAGTTCTGGGCGCAAGTTTACAACAAGTTAAACGGGATTGCGCGCGGTCCGCTGGTATTAGATCCGGGGATTATAATCCGGTACGGGGTACTAAGGGAAAGTCTGTTTCGGGATTCAAGATAACCGAACCCGCCAACCGTGTAAGTTTTACAGTTGCCCTGCTGAGCGAGTCGACACCCCTTATTTAAGTATGTCCTAGCAACTTCGAGGCAAGGCAAAGAGTGTAACGGGATTAGGGCCTGACTCTATTAATGGATTGTCAGAGGTAATGGAAACCACCCACAAGGGTTTACTATGTTTTGAGACTCTTGGCCGACGTGAGCGAGCACAAACCTCCCCATATGGGCAGTCGACGTGTCTAGCCCGTGCGCCAATGAGGCAGGCGCAATGGTCCTACTTGGAGAGCCATACATTAGCCGCCGCAGCCGGACCCAGCCGCCGTCTTTCCGACGAAGTTATGCGAGCAAGCATATACGCCAGGCCAGGCTCCCAGCATGAGCCCAAAGTTTGAAGGGCTGACAGGCATCAAAAACACAATGTGTTACCATCGACCCATCGAACTTCGCTCGCTGGGTCGGTCAGCCCGGACTAAAGAGAGATAGAGAGTTTAGGGCGTTCCTCGCCTTGATTCAAGGGGTAGGGCGCCTACGACTTGATGCACTGTGTTTCACATCAGTATGCGGAGTTGTCAGCCTCAACCAATGGGAAGCCAACAACTGCGAGCACCGTTAGAGGTGCATGCGGTGTCCGCTAGACATAGTAGTACCTTCAGGCCATGCGATTTGGTTTTTGAAACCGGTCATTACGCGTAAGTGAACTCGGCCACAAGTTCGCCGAATGATCAACTTAAGCGTCCCCCACTTCAACTGCTGCACGCCAAGTGAGAACCGAACCTCAGGGTAACACATCTGGGTACACACGCCCTCCGTATTGCGAATCTCTAACACGAGTAATGGCTCACATCCGAATCACCGGCTCAGGTTAAAACTGGTGAACTGTAAAATTCCGATTCTTGGTCACTGAATGCCCGCGCTACACTGGCTAGGCCCGCTATTTCATTCTTGTTGGGACGTAGGGCGTCGCGTAGGGGGGTTGTTCACCCATAAAGCGTAGCGCCAGGTGTAGCCGGCTTTTGTTTCAGGATTATACGCAAGTGTTGGCCCCCTCAACTGGGCGTCAAGGTCGGGATGTCATTCGCTAGCAACAACGTTAGATTACCTCCGTCACCACCCAACCCTGTCAGGTTCTGAAGCATAAGGGGATCTAAAACTAACGTCGGTCCTAGGTCCAGGCACAAGTGTACTTTCGAGGCGACTTTTTCACAACGCAGCAAAACCTTACTGCGATCTGCCCTCATAAGCCAGTCAGGTCGATGCGGGATCGACGTGATTAGTTCCTTATGTGGAACGCAATACGCGTTAGATCTACCACCACGCATTCATCCATCGACGGAGGCAAATCGCTTATTTCGGTCTCGATTCTGTGTGTGGCCTTAGCGGGACCCGGATTGCGGTGAACAGTTATTTCGGACCTTGTCGGAGCCGGCGCCACGTGACACTCTCATTACGTGCCTGATGTGTCCGGATGTATGTACATAAAGCGTCCTTTGTGATGGAACAAGTCGTCGCAATTCAGGAGGTGAGGGCGGCGAGGGGTGTTGGTGACAAAAACCCCGTGGCCAGCTAGTTTTAATTCTTGATAATCCGCGGTCATCGATCCGATCCCACGCTGTCCTGATTTAAAGTACTGGCTTAATTCCACGTAGGGGTTTTCTTGGCGTGTTCCGTTTACCACGACTGTGCGGCTAGATTACGATCAGCTAAGGACTCCGCGGTGTATCCACAACGCTAGTTATCCCCACGCAGGAAAGTAGCTTTTACCATGTGTGAATTGGGGCGTGGCGTTCTCCCGAAGAATCCAGCGCACAGTTTGCCCTATGGGTAGTCCGTACATCCAAGTGCCCGTGGTATCGTCTTGAGTAGCCGCGGTAAGTATTCATGTACGATAAATCGATTACCCCTCGACGAGTATAAACTCGTGCATCTACTTTGGGTCCTTACTGTAGATGGCGCCGTGATCTAGTCATGTCAACGTCTAGCCCGTCTGGTAGCTATCGCCATGAAACTCATTCGAAAGAAATAATCACCCACCCGTAACAGACTTGGAATGTGGATTTTTGCGGATACTGAACCAGCTGTCCGCCTGGGGGGAGAACATCCCAGAGGGCGAGCAATTAGCATCGCCTAGAAATAGTCGAGGGCCTAGTCGAGATCAGAGGTTTATAAAGACCGCAGGCCCTTGGCCAACCGTTACGTCCTGGCCATTACGACTATGTAGCTCCACGGTGTGTTGAAAGTTCGACGAAAGCCGTTGTCAAGTCAGCACGGATTTAGTACCGTAATGTCGTGGTCGTTCCATAAACTTCTTTCCCCCCCCGATTTGAGTGGTCGAAAGACGAAAATCGTAGCCCAGTACTGTCTCCAGCGGAAGCCCATAGGCATAAGAAACTGACGTCATTCCACCACAAATCAAGGGCGGCCTAGACGATGCTTTCGACCGAAACGGTGCGTGAGCGTACAGCAACCGCGCCCACTAGAATTAGGAAGAAGGCTGTTATTGATGCCACTGTAAATTGTTTTTTGCACATGTTTACACAATAAATGTCACGGGGCCGTCAATGGACTATTGCCAAAGCCCACTCTGGTGGGCCGTCATGTGGTGTCGGTTCAGCAAAAGGTACATGTTCGACTATTGTAACCGTTATTCGGCCTGGGGACACCTCCCCAGATATCCCGCACCCTTAAACCGCATATCGGCCGGAAAGATCCTTACGACCAGGGAGCGGGAACTTGATCGACTGGTTGTGTCATTCTCATAACGGAAATTAGCCAGATCCCGCTGCTATATGATGAAGTGGCGTAACACGGTCACGGGCTTACTGTCCCGACAGGGGCCTTAAAGGGAGTTCGCTACCGGGACTGACAGTAGATTGACACTGGTGGTAACATTTTCGATACTGGTGGTATTACTCGTCTCATCTCCTTTTACAGTTCAGAACGTGGTTGAAGGGCTATGCAGAGCACGTGCCAAGTACGAGTTGGCTAGGATGTCTCACCGTGGAGCCGCTTAAGCCTCACGGCAGAAACATCGATTTCTCCTTGGGCGATGTTAGTTGCCACTCATTCTATGTTGTGTCCGTCGTCGTCAAATCTAACTCTATACAAGCCTCTCTCGCAGGGGTTTGATAGAGGTGACGAAAATGACACAGACCATTGGCGGCAACAAAGCCTAACTGCCTCGAAGCTTGTCCCGAGCGTTTCTGTATATACATCCTAGCTCGCTTGAATTCAGAGTCACCGGCCCGTGTGGAAGGCCAGATGTTCGTAGGCTTCTCATCCACTACCTAATTGATAGCTTGTGTTCCAACTCCGTGACAAGAGTTGACATCGTCTCTCCACCCAGGTTGCACTCGAATCGTATAATTTGAGTCCTAATCATCTGGCTCTCGCTGCGAGATGTGGAACCGCAGACGGTCCTGCCCGCTTGGGGGACGGAGGAAAGCTCCCGTTAACTATATCGCCCGTCGTGTAATATGATACTCTTATTGTCGGGTGTAAATCGAGCCCGAATACGTTTTTTTGTCGTCTAACAAGCTCTACCTCATGCTTAGTATCCACCGCCGACGTAGCTCTCAAACATATTTAGTACTCCGCGACGATGAGCACAGTCGTGTGTAGTTGGTTTACATTGATTAACATAATGCCCGTTGGTCGACATGACCTACAATACTATTAAACTGCCGACGAGTGCTATTCTTCGCGGCCCACTTTACGGGCAATCATTCCATTTTCGCTTCCTCGCAGTCCATAGCATTGGAAGCGGTAGATGCCCGATATGTCCGTTGGAGTCGAACAAACACCTGTTGATTTAGAACCCAAAGAGTCGGGGACTGTTGCGCGGGTTCACCTAAAGAGGACACGTACCAACGTACCAAGAGAAGTTTCGGGGGTTTGATAGCCGGCGATCAATCTGTACGACTGTTCTATTATCTAATTGGTAAGCACAGGATAATGCGCAGGGGAGGTAGTAGCGTTGTCTCTATGCCAGTGTGATGTGAAATTCTCGTTAAACGGTTGATGACCTATCGTTTTGGCATTCGCTCCCCAGCCAGGTGTAGTGGTAGCCGTTGTTTGAGGTACTTGTCGCAAAAGAGTCTGTGCCGTGGTTGTCGTGTCTTATCTCCGATATCTCATCTAGAGAACTTATCTCACGGGAGATTGCCGGCGGCTAACGGTCCTGTGCACTCCGTCGTGAGAGTATGGCCTGCTCCGATAGACGAGGTTTGTCAGTCTGTAACGCCGCCGTTTGTGCGCCCAATTTCAAGCCCATCAACCTCTAGAGCCTAGATAGAAACGCTTTGCAGTGAGGGTACGATGGAGGTATGTCTGGGGTTCTGACTAATTCACAGTAAATAGAGTGAAACGTTTAATGGACAGGTTGGGAAGATGTAGATGCCGCTCTGGGTGTGAAGTTCTACGTATCAGTATGAATCGAAAATACACTCTATCTCATATGACCATTAATAAGTCTTTTGCAGGTGTCGCGCTTGATCCCAGCCGTTATGGCCCCCTTTTGGTCATCCGGACAGAACCTGGTATTAAATATATACCCATCGCTAACCGCCCATAATCGTGTGATTCGACCGAGATCTCGTGCCTATCGCTTATCTGTGCATGTTGTCTGTAGATCACCTCTTTTTGAACGCTTCCTTGCAATCGAGAGTAGCACATCAGCAAAGTAGGTATCTCAGCGAATAAGGTTTATAAGATATGTTGCCGTGCGCGTCCCATTTGGCAACTATCTAGAATACTGCTGCGAAGCGGCATAGGTTGGAGGAGCGAGCCATGAACGGGGTTAAGTGGTCTAATGAACCTCGGACATGAACCATCAGCATCTCTTCATCCCTGTCCTGGAGACCTCCTGCGACATTAAGACACGGAACCTCGTTAACCATCGGTTGTCGGCGGCTCGTGGAGCCTGGAAGTATTATCCTGTGTCTTATCGCCGCTTCACTTGTCTTCGGGCCGAGGTCACGCGCCGTACACTAGCGGCAGTTGCTGGTGCGGGGTTTCATCTCGCATCGGTTGAAATTACTCAAGGTCCGGCATTGATACAAAAGGTTTTCGTGATCAGATTGACACGACCGCTTCGACCAGTCAGCCGTTGTACACTTGAGATTCCCGTTACCCGCTTATAGAAGCGAAGGTATGGGATAAAGAGATTGACAAGTACGAGAGACTCGCGAATGGTGTTTCTGACTTTTACCCTAACATTACTGGTACCCAGACCTGTATATACCGGCTCTACAAATCGGGTTATACTATCTAGTCTTCTGCGATTAGGCGTACAAGAATTCCCTAGTTCCTCGTTAACCTAGCATCCGGGCCTGTTAGATTTGACCGAACCAATCCGTATTCTTTCCATGCGTTCCGCTGTTAAGGCCGGTCGAGGAATTCATGCCTAGTATGCATCCTGTTCGGCACGGCGGCGCCGCCGTACGAAAAGATACTGAATCGACTCGTGAACCCAATGTGACTGCCCCCGTCGTCGTTATTTCGGCTCCGGTGATACATAAAGATCTATTCGGCGAACCGCGGGGCCATCAAATGTATCGTACTCAAGCACGGCCGCCCTGAGTGAGTTCCATTATCATGCAGCATGCAAGTGGCGAGTGTCTCGAACCACCAATTGCACCACGCACTCTCGTCCTCCACTACACACCCTCATTGTTGTTCGGGCCTTCGCTCTCAAATGCTGCAGAGCAACAATAGCGCTACCAACCGCATTATGTCCAAGTGTTCTTTGGGGCTCACTGCTCTGAGACAGAACGATGAGAGGACCTCTGAGGTAGTTCAAGATGCGGCCGGCGTGAGCACGAGCACCCGGCCGTCCTTTAGACAATCTTGGGCAGAACCATTTACTATAAGACGTTCGACAGGTGTGAAAATGTGTTAAACTTAGACGTGCCCCGTACTGTCGTTGAGGGTTCACACATCGAAACAGTGATCGAAGAAGCGGATTAGTAGGGCCATGGAGATATTGGTACAATGCATGCGACTCAGCTGTTCTACAAGAATGGGCCAGACTCTACGAGGACCGCTGCCCACGAATTTGACCTCGGAACACGTCCGTCGGGCCCAAGTATCCCAAATCGTTTTGAATATCGGCCGCTCTGAGCCGACGTCCCAGTCCCGACGGTTGCTCCGCTGAGTGACGTCGACTTGTAAGTATTACCGAACATAAGCCCCCAGTATAAATCATCTTAACCACAAAACACCAGACGGGTAGGGCGCTTACGCATACCTGACGCCGTAGACTCATTGGGGATAACAAAGGGTAATCCGCTCTTCATAGGCGACACTGACACTGGGGCTACATCCAGGATTGAGAGCTTGGCAGGGATTAGCGCCGAGTTGTCCCCCATTTCATACAATGGAGATAATATGCACATTGTTACAGAGATGCTAGAATTCTACAAATCATAGTTTGGTCCGCTCCTAATCTCGGCATTTGTGCCGATGGATCGGTGTCTGCCTAAGCGCGTGGCTATTTATCATCACTATTAGATGTATTCGGCAAGTGATGGTCCTTTTCTGGCCTCACTCAAGATAACCTCTAGGTTACGTAGATGACTAGTACTAGTCCTCAGCATATTCAGCTAACGCCGAAAGTCCTTTCTTGGTACTTGACAACATCACTCTGTACAGCTTTCCTGTCGTTAGAGGGGTGGGTATACTTGCGCGAGAATTCCTGCGCCGAGGCGACATCCCGCCGTCGGGACATCTCAGGTATACACGACATCGCACCGTAGGGCCCGGTATTTGGGCTATGCCTGTTCTGCGCCAGCATGGCGCACACCAATGCGTTCGGCAAAGTACAGAGTCTATTATCATGGCGTTTGGCCCTATCGAGTCATTATGATGGACCGATGACGGCTTACAGGAGGATGGGTGTTGTTTTCCGCGGCATCCCAAAAGCCCTTAGACATGTATCCGTTGTTATGCGTACTGCGTTATCTTTAATGTTATTTCAGGAGTATCAAAGAAGTAGTGTTGAGTAATTATAAGCAGACGACCGGCAAACAAAATTCAACGCGCGGCCGCGATTAGCCATGTACAGCTTGTGGTCGACCGAATGAGTACTCCCACGTAGTCATTGATGCACTCTTGAGTTCAGCCGCCCTGCGTATGGACGCTTATGACTATCTAAACCAGCAGGTCCGAATAAATCGGCATAACGATATCCGGTTGGTTATTGAGTCATACCACACGCGGTCTGCAACTGTTCGCCCCGCCATAACTTGCTGGACTTTCGCTCACGTCTTATACTGTGGTCATCCACGCTAGTTGTACTATCGAAAAACGCTTTTCGTGTGCAGTTGGGATGGGCCCAATATCCCTCAGATCGGTGTCCATTTGAGCAGTAGTATCGTTCGTCGCTCCTGCAGAAGCGCATCTCGCCTGAGTGTATAGTACCCTCTGAGGTTCTGAGCAATAAAAGGGCCAACTCGTTTCTTGATGCACCTTCGTAACTCCGTTCCTACACTTCGTCATAGAAAATCCTCGCCATTTTCCTCAGATAATCCTTTTTATGGTACCGGAATCTTTCGGCTATGATAAGCTGGCCCAAAAGGACACTCTCGTTCCCGACTTCCTCGTAAGGTTGCTGACGGACTGGATTTGACGCGGTCATTATAGATCGTTTCCTGAAGGTACGCGCTCTACTCCATACTTTGGGGGTGGTCGTGGTTGAAGTTAAATGATCCATGTGCCTACGTCTGAACTCATTGTTCTAGGATTCGTAGGGGACCCGCCATGCCTCGATTTATATACCTTTAGCTACCTATGCCATCGAAGGTAGAATCCCATAAACTGGGATCACGCTATTGTGAATTCATTCTCACTTGGATCACCCGATATCGATGGTTCGAAGACTGAATGGCAGGACTGGTGATGTCTGCAGCAGTCATAGTCGCTTAAATTGGCCAGCTCTAGAAAGAGCTGATAAGCCCATGGGCCAGCGACGAGTTTAGTATAAGTCGAAGGTACAGCCTTTAAGCATGAAAAACTTACCAAGGTATGGCTCTGGGCTGGCCCAGCACCACTGTTAGTCGCCAGAGAGGCTCTAATAGGTAATCGCCATCTGGGGATAAGCAACATAGGAC +6 TCCCATGGGCTCAAACAAAGGAGATGCCCCCTATCACTCATGCAGGTTTGCGTTTTAAATCTGCGATGCTCCCTTGGATGGTACGTTGCTAAGAGCACGCGAAAACTTGTCTCTTAGTGCGCGGACATTGTGGGGACTCTCATCAACATCTCTGAAGGGGCCTATGAAGCGCCCTCTTTCGGGACCCTCTCAGCGTATCGAAGTGGCAACCGCTACGGGGTCCCATCCTTGGAAGCCATACGCACGGTCATAATTGATGCGAACAGCCCGGCTTGCACAAGAAGCCCATGATCTCCAGCACCGCACCTAGGTCTAGATAGCACCATTCTTTTACTGAACCGCAGCCACCGACGAATGCCCGTTCCCTATTAACGCACAGGAGTTCGCAGTTGAATCTAAGGCCTGCAGCACGAGCACCGATGCGGCTTGTCGGCCATAGGCGGAGGGGCTGCTGGTCGCAGAAGATTTGTACGACTTTACGTATGCCGCTAGGTCTACGGTCAGAGACTCATTGCCCAGGAATAGATTCGCCAGGATGGCAACTAGCGAGACGTAACTTGGTTGCACAGTCCTGGCTAGCAATCAATGTAGCGCCCATTTTGCGACGCAGGAGTACAAATCCCAACAGCGGTACACTCGCGTGTTGAAGGTAACGTACGCCCGATTTCTTGCAGCACATACGGCGCGATTTTCCCCGATTGAAGGTTGTTCTGACCCCCAAGCTTCGGGACGATGAGTTCAACGGACGTTCAAAGATCTTTGCCGCAAAATCAGGCCGTCATGGGTATTACGTGGCCCTCGGTCCGGCTATTTCTCGTATTTAAAGCCATGTCGTATCCTCTTTGGAAGGTGATAAATCCGGGGGGATAGGGCCGTACACGAGACTGAACCCCTAATGCGTACATTGATGATAGATCTGCTGGCTAGCCGATGCTAAAACGGTGGGTCATCAAACTGAGTGCGAGGGTTACTGACACCAGAACCAAGTTCGAAGTTCTGCCCCAAGTTAAACTGTTCTGTCAACTTTGTCTGGTATTCGTGTAACCGCGCATATCTAATCCGTTTTTTTATAAGACGTCAGGAAGACCTTCTCAGGGTGAGCACAGAATTGGACGATCGCCGTGCCGCCCGTTTACTTGTAATCGATCAAGTCTCATTTCGGCACATCATTTCAAAATTTTGCACGAAGCCCCAGACGGTCGTGAGTTAGTATCCATTCAGTCTACAAAGTCGAGAGCACAGGGCTATACCAAAACACGTCTAGTTACGGTAGTTTGTACGAGTCTTTGTTTTATACGCGACACCTGGCGCCTGAGGTTGTATACACGTCGCGCACTCCATCCTGACCTTACCTCACGCTTCTTGCAAAAAGTCGTGCCACCGGTCGCAAATTGATGTTCGAGGGAGTTCTAGTCCCGGCGTGATCATGACGTAACCATACTGCAGCTAAGAAGTCAGAACCGCGGGAGAGAGAGAATTGTTATAAGCCGGCGGGGAAGCATTTCGAGTTTTCCGACGGTTCTGTCTGTGAATATGCCTGCTCGGCACGCCGCGTCCTCATTACCGACCCTGTATAATGTCGAAGTTAAGGCAGCCACCCCGAGCGGCGCCGTTACCCTTCTTCTAGGTGAAGTCAAGCTCGACCGACAGTACTCCAAGGGTTCTACGTGAGTTATTTATGTCAGCAGGCTGGCTCTTGAGGGGAGAACAGCAATAGTCGGGTATTAAGTCTCCGGCATTATTTACAGCTCTTAAGACAACTAGGCGGCCTTTCGCGACCCAATGATGGGGTAAGTGTGCTGAGACGTCGCTGATATGCACCTTCCGATAAACCTCGGCCTCTCGTCATTGGCATGACAAAGAAGAATCTAACCAAGTCCGCGCTCAAGTAATTCAAGAATTTCTGTGAATGCCCCTGCTTGGCTAGCTTAACCTTACGCATGCGCTTATACTCGATCTGAAATATGATTTCGATGTGTTGGCGCGGACATGGTGGTGGCGAATTCGCCTCTTTGCATTTTACCCTTGCAATTTGCCAATATTTGCATATCTGCTTCAGCCCTGACTCCAGGGGCCCATTCGTATCGTCCGTCGTGGACGGAAATACGAGTGAACTAATCGGAGTTTGTGAACATGGAACGGTCCGCACCCAGGAGATCCGTCTAGTTCTTTGTCCTATCAGCGTGTTCCCCCTGGGCGACGAAGGCCAGAGTATACTCACGACGCCCATATGGGTCAAATCTTTAGCTACTACAGCAGTATATCTTACAAATCGGTCACTGGCTTCCACGTGTGAGTCGAAGCAACGATACCGATGCAGCTCGCTGTCCAGACCTTTGTGGCAGACTCCAGCGGAAGTCGGCAAAGGTAACCACACCTGATAACTTTGTATGTCTACTGTTCCTTTAAGACTTCTCCCCCCTCGTTTGTAGGACAGCTTGCCGGACAAGGACATTGGATTGCGACGCCGAGGGCCATTATCCCGTGAACGCCTCTCCGCCCAGAGAGCATGGGTGATTAAATCAGGAGGACTTTATCTTCGTCCCAAGTCAGGCCTCTGATGATCACTACTTGAATGCGATAGGAGCTAGAAAAGAGGGTACAATTGAGTTTCCTTACGTAAGATCCTGATTCTTTGGGGATTGTGATACAGTAAGCGTCAGGAGAACGCAAGTGGGCAAGTCGGATTATTGAAGGTGCCACTGTTGTGGATAACGACTGGATCTAGTTCGCTGACTCAGGACCTGATTGTGGTTCAGTTCCACTCAGGTTCAAAGAAAGTGGGCGACTCCAGGTTTAGGAGTCAGGCCGAGTATCAGATGTTTCCGTGTAGGGTGAGACTCGGCTTCATCGTACGCGTCCAGCTCGACAGTTAGCCAGTCACGGGGACATATTGATCAAGCGGTATCGCAGCCGGCCTTAAGAGCGCCGGGTCCGTCAATCACTGAGCGCGCGCCAGGGGGGAAAGGATACATATGAGAAAGGAGGAACTTAGTTAGCACGTTTCACTCCAGATGGCGTTGCTAAGCGACCAGTGGTCTCGATTCATCTTCTTGAGCTACTTACGGGTCTGTCCTACTGTTTTGGCATAGGGGTGAGAACAGAGCAGCCAGGTATGCTTTCGACCTCTGTCTAACGCCGTAGCGGAGCTTACTAGCGTCAAACTGATACCCATCATGCATTATATCGCAAGACAATTCGGTCAAGCTCATGCGTAGACAAATTCCTCTTTGAACTAGGCGGACGCCTACGATGCGTAATCACGACTTAAAAGCGCCCTGCCGATTGAGATAATACAAGCGACACATACAGTACAAAAAGTCGCTTGTGTTGATCGGTTTAGCTATGTCCATGATTAGAGTGACCTGGAACTGCAGAGCACCGCAAGGCTTTCCACTGCTTGAAAAAACGGACAGTGTCGCGTAAATCGTAGTTAGGCATCAGCGAAGATTGTAGCCGTTCACTTGGACAGCGGAGACCAATAGCGCTAGGAGTGCACCACCTTTGGCAGTCTCTCTCCTGCGGATAAATTTACCAGATTCAGTGTACACCTATTGTTAAGCCATCCGGACTGGTGTTTTGGGACTACTCTACATTGATTAACCAAAGGACAGGATCCTAACTTGTCTTTTATTCATAGTGATCTAATGTCTACCGAGATATTCAATCACCTTGGGTGGGTTATATCAACACTAAGCCAACATGTTGATCACAAACCCGCTGACCCATGGTGCAGAATGGCCCCACAACACTCCCTGGCTACCCGAATAAGTTTTCCATGATTCCTCGGCAATTCTCCAAGATACCGAGCCTGAACAACCGGAGACCCCGCCTAATGACCCTTCTAATATCTCATCCATGTAGTCACTTTAAGAGTAGGTTGCTTTAGGACGGATGCCCTGCTGCGAGTAGTGATAGTGAGTGACACACAGCTAGGTCCTGACGTACCCGCCTAGGCTACGCTCTTAGGGAACGTAGATGATTATAACACAGCACCTTGCGCGGTCGCCCCGCATCGGCTACGTCACTCATACTCTGACCCAGCGTTACAAAATTTGTATTGCTGTTTGCTCGCAAGTCGTTTGTAAAGACTGTACCTTAATCTGCGAGCGCAAATAATGCATCGGCCCAGTCTTCCTCCATTGCGAGACTACAACATAAAACACTGAACTGCCGTTTAAATATTATTGCTAATTCACTTAAATAACGGAAGCAGAAAGCTTCCCCCGAATTAGTTGTTCTTCGGAATTCTAGGCAGCGTAACAGTCCACTAGAGCTCTGGCTTCCCGTAACGCCTGTCTAAGTCCTATTAAGCTTGGACCGGGTCCCACATAAAGCAAGCAGCGGTCCTCGTCACGACATGGTTGGAAACAGCCATTTTATGCACCCATAGTACGTATTGTGATTGGCCACCGTCTCCGATGATCGGAGGTCAAGCCGAGGAAAGGGGAAATTTCCGGGCTGCCCGGTTGAAAGTGTCCGCTCGCATCGTTCGGCTGGCACACTACCACGATCATTGTTGTTGGAGCAGAATAGGGGTCCAAGCGGGTGCACACGCATGTGGCAAGGATTACCAGTATTTAACAAGGTTGGCGGTTTCACGGGTCAACTTTCATATCACCATCCGCGGGATAGCTCTAGGTTGTGCAATGCCTATACGCGGCGGTCCAGGCACTGCCGATCTTAAGAAATGTCAAGCAGTTTCATGGTCATGGTGTCGGAAGGGATTGACGCGCGGGCATCTCCGAGCTGAGACGGCATAAAGCGGCCACAAAATTCGTAGAAAACCTGGAATTATAGCCATGGTGGGCACGATAAAACACAAACAGACGCAGAGCTCTGCATCTTGAGGCCCTGTGATCGTCAACCCTCAACCGTATGACTTATGTGCCGGAGCCTCCTTCAAAAGTATAATATAAGGAATGGCGTGGGGGTTAGTTCGTCTCATGCCCCCCGAAACCGTCGGACCCCGTGTCCTCGTACGGGGTGCCTTCTTGGTTACTACACGATTAGAAGGAAAAGTACCCGACCTTGGACGCCACACATGTGGCACGGGTCACCCACGTAATATTAACGCAGATTGGATCTTACTGACTATTTTGCAGATCAAGGCCTATCTGTGTGATGATCGGGTACACGCACTTCTAATAGGGCGACCGATATGACACACTTCGCGGAAGCTTAGAACCCTTCCCGTTTGTTATCAACGGTAAATCCCGGATCCGACTCGCTACCTCAGAACCGACTCCTTGTTAGGGTTGGCGAGCACGGGGATTGTTCTGTGTAGCACTAATGCGCGTGCAAGGCCCCACTCTCCATGTAAGGTGAACGCCGCGACCGAAGCCGACAAAAGGCCAATGATTGAAAGTCAGACATGCCATAGGCCGCAGTGCTCCTGCGATGTCGAAAACTGCGGTAGACTGGTGGGTGATATAACAGGCCACGGCAAGCGGTCATGATGAGGCCCTAATGGACATAATGCTGTCTACCGCCTATGTACCCGGGCGACCGTGAGCATGTCATTGAGGCACTTCCTTGTCTTTCACGAGTACCTACAATACTTATGAATACCTGCGCTCATCAGTTTGAATACGTTACCAGCGGCATCAGAACAGGGCATTCTAATAAAGCCGTCTAGTAAAGCTACGCACTTACACCACGTAGTTATAGTGAGTTATCCGTAACAAACACGAGTTATAGCCTTGTGAGGCCGCGCACGTACGAAAGTTTATATCTTTTGTTTGGTACATCCTGGGGGATAAATTGTTATCACTCGAGCTGGTTTATGGTTGAAATACAACCCTGCCCTTTCTCAGCTTATACCCGAGTCCCATGTGCACAAACTATGAAAGCGGAATAATAGATCGTGTTGACTTCGCCGAGTTGTAATGAGACATATGCGACGGTGTCAGATTGAATGCCTCTCCCTGCACGCATTGCATACCTCATTCTTAGGTCAACTTGCATTAGCCCCTGTGCTCCCGATCATACCTTGTGAGTAACACATTATTTCTCTCAAGGTATGACCCAGTCGATGCTGGCCTATCTACGGTTGCCTCTTGTACTTCCTTATTCTTGCTTCCGAATTATAGGATTCGGCATTACACATGTCCTTTTCATGCGCCGACTCGATTGAATCCCTTTACTCTTCAGGGCTCGTTTACTGCGCTGTAGTGGAAGTGACACTGTTCCATTTAATCGTCCATGCGAGAGCTCTATTAAAATAGACGGGTGGTAAGAGGGGTATCGACCAAACAAGTATACAATTTTCCCCTAGGATGTTCATGAGAACGTGGGAGGTACTGCCAAGTCCGCTTCCGTTGCGTGGATACCTCCTCTTCACCATTGTTTGCTTGGTATCCTAACGTTGGATGATACGAGTCTACCAGTCTTTCCTATTTACAGTACAAAGAAGAATTCTGTCTGACGGATCCTCGTGAGACGTCCTGCATCAGGCGAAACGGCTTCACGCATTCGTAAGAGAAAGGCAAAGATCTTGGGCGAATAGTAACGGCCAATTACGCTAATTCTCGCTCTGCAATTGATTACACTTGCTACTAAGCCGCTGTGGGCAGATCTATGACTAGTTGTACCAGACATAGACGTTTTCATGCCGACAATTGGCTGGAGAGAGGTGGTACCCGGGACTACTGGCAAGCTCTTATGTGTATTTTGGTCCAGGGTGAACGACGACGGGAGGACGGTCACCTCCTTTTATCGCTGATCGGCTTGGACTGGATCTGTGTACCCCGATGCCTAGAGTTGTGGGACGCTTGCCGCCCCTTAGGAGATAAGCCTGAACAGTCGCCAACGAGCATCAGTGAGGGCAATCACTCCGTGCGCCAAATGCAATGTGGATCCCCCGTGCCTGGCTCTGTAAGAAGGATATATACCGAACAGCGGAAGTCAAACTGGATTTTACTCGGTTCTAAGGTTCTCCGGTTGAAAATAGTACATTCCTGACAAAAAGTTGAATCTTGCCAGTCAGCGGCGGCCCAAATCACATTTCGACATCCAGACGTTTCCGGTGTCTGCAATGTACTTCCATCGAGTACGGGTTTTCGTCACGAAATTTTTACTTCCAGATTTAACCGCGCACGAACCGATAGTTAACTCAATGAGTGGGTCTGATCCCGCTGGGCGAGGCGGCTTGCACTTCGGCGCTAGAGCATGGAACGCAATGATTCAAAAAATGAGTAACCAGGGGGGAACACTGTCATTACATACACCGGTCCGATCTGCGTTTGCATTAGTAGCATGGGAGTGTATCTGCGGGACTCAATTTTAAGCGCGAACTGGGTTGAATGGTATAGGATATGCCCTATGAAATGGATCAGAAATCACCCGGAGCAAGTTAGGAGTTGTGGAGGTTGCTAAGATCCTTTTCGGCAGCAAGGTGCCCGGAATTCGAGAAACCATGATAATATTAAGGGGCGGTAGGCAGCTTGTGTAGCGGCTTGCCACCCCTGGTAGATAACAGAGAAGTCGGGTATAGATCATTTATCTTTTGCGTTCCCCATTAAGCATGGAGTCGAGATAAAATCGTCATACGGAACGTTTGAAAAGGCAGAAGCCCTTAGTTATAGTCTGGATCGCAGCGACTCGCTGTGCGATCTCTCTTTTGATAAGTAGTTGGATCTCGAGATATTTCAGCCCGGTGACATGCGCGATATATGAACTAGGTTTCCATGACGTTGCGGCTATCGCGCGCGCGTCGACGTCTAGCAATATCCCCGAGCTTCCTTGCAATTACCTAACCAGTTAGGGTTGACCGTGGGAATATGTCCGCCGCGTTGAGAACTTCGAGATCCGTGAATGGTCAATGATCAAAGAATCGTCGCAGAGGGGAAGGTGTCTAAAGAAGCTACATCACCAAATGCTGAACATAATTCCCAGCCAAGCCAAACGTTGTGTCTCGCAACTGCCGGTGTTGTCATTCCATAACTTGCATGAGGGAAGGGGACCGAATACAGTCTCAGGTGGGAACGGCATCTAGACCACTACAAAAAAGATACCATCGTAAATCAATCTAGCTATTAATAGCTCGAGAACTAATGTAAAATGATCCTTGCCCTTCAAGAATGAAACCAGGAATCTTGCGGGTACACTTTATGTGTATCGATCATCCGCGTTTACCGTGAAGTTATGTAAATTTGTAAATCCGAGGACACGAGCAGAACGACTTTTAGGCGTCCCGGAGTCTGTTCGGAATGAATGAGTACAATACCAAATGACTGAGTAGAATTTCTGGGTGTCGATTAACACTTTCAAGTAGTGGCAAGCGAGTCACAGTGGTCTCCGGTTAAGCACGCCTGATGACCCTTGGGAAGTACGATTCAAGGCCATGGATGAGAGGGGGCCGGAATTGTCCAAGTTAGTTTATTGAAACCCAAGCGCAATCTTGTTGTGTGAATGTATTAACAAGCGTCGGGAAAATCCTTTAACGCAGAGGCGTGTATGATTATCAAGCGCTCGAATCTTTTAGGCGGAGTGTCACCACTAGTATCGCGAAAGGGATTAGCAAATTGTATTCGCTGAGGGCACAGTCGTCATAGGTTTCAACCAAGCCCTCTCAGTTTTCGCGTGGGTTACTTTCATGGACTTCTGCGGGTCCGTGTTGGTGCACTCATGGCGAAGCTCACGGTGCTATTGGTAACCTAATTAGAATGAATTATGCTTGGACGTATGGTTGTCCTCCCCATTTATGTTAAGACGGCGCCCCTGTCAACTGGATATTACTACACCCAGTCTCTATGCCTCTGAGACTAACAAAACGTCTGTGGTACCATATTGTAGACGCTCTCTTCAACAAGACCACACTCACTCACTCGGGGGAGCTGTGGCGAGATTGACCCTTTGCTGCAGTCGTTGCCCGATGTGATCGGTGGCGGTAACCGCGAGATATGGTGTTTACTTATAACTGGGCACTGAACGCGAACTCGGCAAGACAGATGCCCTATACTCGTTGTGTTTAGGGTCGTCGCCAGATGCCCAGATTTAACTCCATTCGCCTGGAACATCGTTGTTGCGCGTCGTCACGAGTTGCAGTGCACGATCTTCGTTTACACCGCGCGCTAGGGGAAGGCGGAGTGTCGATAGTTACGCGAGCTGTATTTACATCTAATCACTCAGACTAAGCCCCCTCAAATCCGCTGGTTGACAACTCAGCTGAGGTCCCATAAAGTCTATCTGGTATCCCATGTAGAGAAAGCAAGCGGACTGCGCCAGGAGGAGCTGTGGCAAATCGACTAGAGTAATGCACGATGTATGAGGCTACGTGCGATACTACTACTACAATTTCCGGTGTTTGGCGCCGGGGGGGGCATGCACGGGTATGCCCGTCTATTCCCTGATAGTCACGCGGGCATCCTAGAGTTTCATATAGTCACCCTAGCCTGCTTTTACCTTAAGCATAGTGCTACATTCTTGTACGAACCTTTGTTCTGGCGCGTTTCTCATACCTTGTCTGGTTTAGTTACACCCGGGGATTGGCATTGTATACAATCCCGTCTTCTTCGCGCCTATGTGGATTTAATCGAAGGAGGATTTGTCGCCACGTGCCGCTCCGTCATACGATATGGCAGGCCGGTGCCCACCTAACTAAACTATGGCATAATAATCGCCGACTGGATGTGATCGATGTACGCGGAAAAAGGTAGCCTGCGTTATGAGCTCTGCTGGATACGACGTTTTTCGCTTCGATTGTAAGACGAAGGTATCGAACATATATCCTTCACTTGTAAGGATGGCAGGCCGATCAGTCAGCTTGAATCGTACTATGTCAATTGCAATACATGTAGATGGCCGTCCTCTCGAAACCGACTGTTATCGGAGCTTTTCTTTGTGGTATTGAGTGTGCGGTTGTTCGATGGTTTTCCTTTATTACTGGGCATTATGAGGGTAATAGTGTACATCTACATTATGAAGCAACGGCGACTGGTTGTAGTGTTTCATACACATTCACTCGGCAATTTAGGCTTTTCTCAGCCTGTTGTACCGC +7 TAGACGGTCCACACGTCTGAGATTAACTGACCTCCTAGCATCAACATTTCCCTGAGCGAGGTAAATTCATTCGTAACAGCCTTGATGCGGCGGGTTTGAAGAGCTCGCGGCAACTGACACTGCATTTGACATTCATACACGCTTGGCTCCTATCCTAGACCCCTCGTCACTAAATACCCAAGTCGGGGAACAACTTACAGGAGGTCCGGCTGAAATTTTTGAATATTTACAGAAGTAACTTATTATTTTGACGTCGTACTTGTTAAGAACGTTTAGCATGACATCCTAAGGACTTAGCCTGAATACTAAACTACATCCGGTCCATGTTTTCACCGGCCCACCGAAGAGTCTCACACAAAACGTTTCCTCTACATTCCACTGTTATGTTTTGATTACGTGAATTGCGCGGTGACCGAGAGCGCGGACAGGCGGATCGCATGTTATCAAGCGACCCCGCCTAGCCGGGCTGAACAAAGCAGTTGGGTTGAGAGTTATCCGTAAGTTCTCTTCTTTCGACGGCTATAGAGTACTGGCTGGACTAGCTGATTGTTCATTGGAGGATTAGCCTGATTGACGGAATCTAACGGTACCGTTCTGGCCGGCCAAGCATGCAACTTGGAAGTGGTTAACGGTTCGCTTATTTCGATTCAAGTCTGCCTTAAAGGGCTACGGGGCCTTCCGCCCAAAAACGGCAAATCCGGTGTCATGTAATTACAGACCGATCCTCGAGGGGAAACTCGGAATCCACGTTGGATCGGAAGGCCGCTGCTGCCTACGCACGGACGAATATGCCGCTCGTACACAGTTATAATTATATGCTCAGAGACTCCCTGTCGGGTTCCTAAATCCCAGTGCAATACTGTTCCGAATTCTATTCTGGCGAATGTAAAGCGCATATGCCGTGGGATTGATCCGATCGACTCGTACCGATACGCTATTTGGAACCACTCCAACGCAGCTGCAACTCTTCTGGACAATTGTAACGAACCAAGAGAGGCTCTACGACTTTACACTGGCACGGCAGGCCGGCTGGTGCAATCGTCAGAATATACGCTGGAAGGAAAAGTTCGTGCCGTCCTTAGCGAAGATTGGCCTTTCTTGCACACCAGTGGGGGATTTGATTCTCCTCAAGCAGACGTCCTGACACGGGAACCAAAAACTCCCAGTCGGAAATAACGAGAATAGATAGCATGATCAGAAAGGACCAACAGGAGCGTGACGCTGAGCGCTGTGACCCGAAGTGTGAGACTTTGGTAGAGCGTTCGTAAAGCGCGCATAATGAGAAGTAGCCCTAGGGGGTGTTCAACCGCGCACGGCACCGACGATTCACCTGACCGGCGGATCAGCCTGCCTTCGGTGTTGCGATTGAATATTTACCAGTTAGAAACAATGTTCCATGGCAGACTGAAAGTTAGAAGGCGAACTCGTCCGGGTGGCCTTATGACGTTTGGACGTCACATAACAATTTCTCGGCCGGGAGAGGTGAAAGCTTTCTTAGAGTGCAACACTTTGGAGCGGGAACGCGGTTGCCATAGCTAAATAATGAAATAGCTGTACTCCTAGTAGAACTCCAGTATTGTAACTGGACCCGGTGTTTCAGGTGAAGAGACGAGTCAGCAGAGTTAAAGCTTATTTGTATGAGTTAAGCCCGCGAGGGACCAGATGGAGTCCGAGCCTTAGCGTGAACCCACTAACAAATGACCGATCGCTACATAAAGGTGGGGGTGGTTGTCGGACAATTGGAGTTGGTAACACGTCCTACATTCAAGCGGCTCCTAAAACATGAATGGCATATGATATCAGTTGGTGCTGCACATCTGCTCAACTGGAGCCCCATCACTCAATAAGGATCCTTGATGGCGGAAGAGCGTTGTGGTCTACTGGACCCGAGACCTTCTTGCTGGTCTCGACAGTGAAAGCTCACCTTAACTGCCTACTTTAACGAGATAGGACCTTCCACGCCCCGCGTGCGGAAAGCAATCACTCTCGCTAACCCGATCAATAATTTGAGGAATACTAAAACGGCAGGGTGCCGTCGAGCTATCTAAAATGACTAGCCACCGTCTCGGAACTTAATGGCTCTGGAAAGTATTCATGGGATGTATTCGTAGTGATCTTGACGTCGTTACGGCGAGTATTTACCCATTGAGGTCCATTGCAGACACGGACACCGCAGCAGGCCAATGCCGCACGGGTTCTAGAATTGGGTACGCGCATGCAAGCATAATAACGTAACTAGGAGAGGACCATCTAATGCGCTTGATGCTTAAGTATGGGTCGAGATAGAAGCCCGCGTAGGTAGTAGCGCCGGTCGCTTGCTATCATACCCATCACAACGCTTGTATACATTTCCAAACGCCTAAGGGGTCGGAAGGGGGCACCCCCGGGACATACTCCCCACACCCGCTTCTATGAATAAGCTGGAAACGCGCGATTATAAACGAGGGGCGTCCACTATAAGCCATCAATGCTCGCAGACCTTGAGAATTAGCGATCAGCTCAGGGTGCCGTCACATAATACAGATGCTGCTGCATCGCGCAGTCCAGTTCGCTGTTACATCACGAGTACATCGAACCGCAGCGAGCTATGGCCCGCATACTCTTAAGAAGAGGCAATGTGCACCGCGCTCTCCAATAGATAACGTAAGCGAGTACAGGGACATTTTTCTGCCTTGAATGCCACCGCCACTTGAGTACGAGCAGAGTATTGCATCATGCAACCACAGATCGACCTGGGGAAATTACGCCTTTGAGCGTTGTATCTCACGAAATGAACTACACATTAAGGCGTTTCGAGCTAGCGTGCCGACTCCAGATTTTGAGCCCGGGATAAATTAGTGACAGATGCCGAATACAAACGGGCGATCTGTCTTCAAGGCTCTTGCCAAACCACGAGGCTAAATGGGACCGAGAAGTCACCCAACCCGTTCGGCAATTGTGGGTGCTTTACCTCTAACACCGCGACTCGGCGAACCAGTCAACGAAACTACTTAATCTCATATTCGCGCGGCGCTCCCGAGATCTTCTCGACACGTAACAGGAAATATGCGTTATCAACTCTTTTCCAGTGTGGTTAAATGCTCATACCTGGAAACAATGATGCTCAGCGTAATAGAGAAATGTGCACGAGCCGTAGCGCGGCTCCTGAACCATACGGAAGTGTCCAGGAATAACCCAGCCTATCTTCATGAGGGGACTCTGATAGGTACATTACCAGGAACTCTTTGACATCGTAGAAGCCTTTGGACAAGCTGAGCGCGATCCATCACACACTTTGACTGGCTGGGGGGCTAGTCCCTGCGAGTATCCAATCCGGACACACTTGCATGTTTCACATGCGTGTGTTGGGGACTGTGTTTAAGCCTCCTAGCTTCAGACCGTGGATCTCGGGCTGCCTGTTGTTCTGGCTCAGGCTGATACCTGACAATGGCTCAAGTCGCGATTCCCCGCGGCGTCCCTTCTGCGATTCTGAGGAAAGACCGTCTAAACCTTCCGGTCATGAAGGACCTGAATAGGGGCGCTGATATCCGAACTAATACCCCATTTTGTCCAACTAAAAGTCGGGGCCGAACGAGTTTCGGTCGGTTTTCTGGAAAACCAAGGGTATGGTATTCTCATCAACGGCTCTCAACCCAGATCAGTGCTTCTGGCTGGTCGACGAACATTCCATGGCTAGGGGACTGATCGTCGAGCGTTACAAGAGCTTGATAAGTAGTATATAGCCCAAGACCTTCGGTACATGCATGCCTGGTGATTGCAGGAGTTCTAGTGTTCGTGGTTAATCTGCGATTACGAAGGACCGTTAGAAACAGCGTCGTCATCGTCTCGGACAGTTGGACTTTCCCACTACTATAGCCTTGCTGCCCCGCCACTTGTACGCGCTTACTGATAGTCTTGTGTGACCGTCTGGCTTATTTCGTTCAGTAACTCGAGAAGGACAGTGACTTCTAACACTAACGATCACTTATCCCCCCGACGGTCGGTACTAAAAATATTCACGAGCGTTTGTTGGTGATCTCACAGTACCAAAGCTGGATTGATATAGGCTATGGCTAAGCGGTTGGATCCGTCGGTGACTTGCAACGGGCTCCTCTACGGCAACGGTATCTGATACCTAATAGTGTAGTGCTACACCATAACAGAGCGGTATGCTAAATGTGATTCACTACCGGAGACGTTCCGGCCTTCCAACCACACGTCTGGCGCACGTACGAAATACCTAGCTCTTTCCCCTTTATCATATAGAGCCTCTCTTAGCGGACTGTGACCCGTGTCCAGACAAACATGGAATTCTCGTATTCGCTATCGTCTAGAACTGCGTGACTAAACGTCGCTATAGTTGTACCGATCTGGCTGGACTCAGCAAGAGAATACTCGCGCAAAAGGTGACAGGCTCGCACGAGTAACACTCACGCTTCTATGCGACGAAGTTGTTCGAACCCTCGTGCGGGACTAAGCGCAATATGCGTGCGTGAACGAACTAGGATGCCGTTGGGTTAAGCCCGGTACACTCATGGGAGGTCCTTGAACAACTTATCGGACCAGGGTGCTGAATCAATGCTATCCTCGTTATGATCTTTCCCTCACGCGTGTATCCTCATAAACCGTTCACGTTCACTCGACAATCGCTGGCTTGCGACGTGTTTGTGCGCTATGTTTGGTTACTCGTAAGTCGAATTGATCCGTCAGTCAGTACCAAATCAAAGTCATTCTCAGGTCTTACCCGTTGCAGCGTGTAAATCTACCCGGGCGAAACATGCGGGTCCTATACTTGCGTTATAGTATATTTTTAGCAGTTTCCACCCTCATCGGTAATCTAGGGGGGATCGCATTCCCGCAGAACTCTATCGGATGGTAGGCCCAAGCAATGAGTGAGTCACGCCGCGCATCGCTACATTAAAGAACAGGAATGCGATTAGATCTAGGCCTAAGTATTCTGGCACTTACGAAAGCGGTACAGAGAAAGGTCGCATGCTTGCTGGATCGGTGTGATTAATCGTTACACCTGTCCACATCCTGCTAATCCGACACCCGGTAAGGCTTTCCATATATGAGTAATTGTGTGAAACGTGCGCAAAAGTATGCATCGAAGTTTGGTTGCAAACGTCACTGGAAGGGGCCTGCGCCCAACTCGTCGCAGGGAATAGGGGTGATCGATCGAGCGGCACTGGCCATGCCTTGAGCCCAAGCGGGAAGTTGGATCACAAAGTGAGAGGCTTCCCGATGTAAAGATATGTCCCTGCACAAGAGGGGGGACGGACCGGATAAGCTCGGGTTCCGGTGTGGCGAGACATTGATTTGTGAGTGCATATAAATGGCTACGAATTCGCAACTCGGTGCCGCCGCTCTCGAAACTTAGGACAGAAGATATCGCTTTGAAATGATTAGTTCGCAGCAGAGTGTTGGGTGATGTGTCCCTTGTATGGGTTTTGCGGTCGGGCAACCCGGCGACGTACATTGCATTTGTTATTTGGTATTCCCGATCAGTTTAACGTTGTCAAATGTTCCAGAACCGTTATGGGCGTGTATATGGGATAGTACTGTTGGTGTCTCGCTAAACTTCGCACTTATTCGATTCTGGAAAGCCCGCTCAGTCGGACGGAATTGACAGAGACCCAGACAACCCTTGCGGCCAACAGCCTGGCCTGCCTCCTATTAGTGACAAGTAAATGTTGCCAGAACTTGCGGCGTTTTACCGCAAGCGGTTCCTATAAGTACCGCACGTACGTTGGTCCGTCTCGGCTCCAAAGAGACGCTACACAAGGTACCATCACAAATTGTGAAGTCACTAAAATTTCCTGGCCATTGCAAAGCGCGGAACTACCGTTCCCCACAGCAGACGCAATCGGGTCTTTAAGCATTGTTCGTACGCCGTCATATCTCCTCGCGGGCAACTGCGCAAATACGAATCTTTCGCCATGCTTTTATTGGGAGAGTTAATAAATGGGCGTCTGTTTTGCCGTTGAATGGTTTATATTGGCGGGCCCGTCCGGTTCTATGGTCCGTGCTATGAATTACCCGTGATAGATAGTCGGCTGGGGGCAGGAACGACACACCCCCACTAATCCAAAGTTAGTGCATTCTGGCAGGTCAATGAGCTCGATTCTTTATTGTACGCGGTAGGTGGTGCCTCAACCAAATCGCTTGTTACATGCGTGTACCCCTGCCCCTACAAGGATAGTGATTGTTGTAATAATTACCGGTGGGAAGCAGTGGTACACTTCCCAATCTTTACTGAAAGCCTGGTTTACTCTCATTCGTGACTGATCGCTTGGCCCCGATCTATACCGAGGTATTATAAGGTATAGGGGCCGCACATTTATAGCCCCGTGGTAAGAAAATTGTCACTCCTACCCGACACGTTCCTACCACCCTTCACTGTCTTTGCCCGATTTTACAAGGGGTAGCACATACACCCCGCGTCCGTTCTGATCAGACCATTGCCATCCCCCACCGCTCATGTCTTACGTTAACCACTCTGTGCCTGTTATCTTTAATTTATCTGGCGTCAGGTGTAGTCCCAGAGACTAATGATGTTACCCATACTTATGATTCCCAATCATGTGTAATGAATATCGCTGAAATTAGAATCCCGATTCGCAACTAATCCCAGCGAGGCTCACGCGTGGATGAAACTAGCGGTACCGGGCGGCCGGTATTCGGACGACTTAGCAAGGTTATTTGAGAGTTCGGCATGCTCAACCTCACGGTACTGGTTAATGGATGTGTGGGTGGCTTCTTACTATTTAGCGACTGGGAGTATTCATAACTTATCCTCCCGCCAGGCAGTCGGAGGCCGCCCCTGTGAGACCTCCGCCCACCTTTCTGATTTGGAAAGTTACAAACACGTGCTGATAAGCCGATGTTCCCCGATGGCCTATCTTGTTACGGAAATGCCCTCTATCGGATTGCAAATAGGCTTGCGTCAGAGATGTGGAACGTTAATACGAAAAAGCATTCAGTCGGTGAGCGTGTGCGCCTCTGTTACACATGTTGTCTTCCGGTGATCGGAACTGACACGTTCATTAACGACGAGAGACACGAAGATATACGTATACTGTACAGATCGGATGGAATAGTGTTAGGATCAATTACAAAGGGGTCAATGAAGGTGAGGCACGCAACGTTTGATCTTATGTCGGAAATCGTGTTGAGTAGTCATGCTCTTTCTCGGTCATGCCCTAGCACAGACGCTGAGGTTCCAAGGCAGACGTCAGCTCTCACGATCGCGCTTCGCCCTGTGCGTGCTAGTCACCGCTTGCTAAGGCCCCTAGGGCCCTGTGATTCGCTGCGATCCCTAATCCCCACCCGTAACTCAAGGCTTGACGGCGGAACGGTTCCCCTAATGTACGATGGTAATGCGGGGACCCAATAACCATACGCCTCCGATCGTGGACCAACACAATCATTGCCTGAAGCGAGGATAGGATTAGATACCTGGAACGGAGCAATTTTGAGAGGCGTATGCGGTGGATTGTTTGGGCTTTTGTCTATCACTGACGGAATCATATTGCATCACAATACCTGACGCCTTTCGGGGTTCACTGGTGGGCAAGATCTACCACAAGTCCAGCATTACTCAGCGAGTGCTCCCTAGGCGTCATTCACGAAGCTGTAGGGGTCCCTACCCTCCCTTCTTGAGTTGGGTGGCCGCCTCTGTAGATGGTCTCACCACAGCTATTAATAATGATAAGGCACAATTGTGACCAGCGTTGCGCACTTTGTAAATTAGGGAACAAGATATGGGGTGTGGGCATTATGATTAGAGGGCCTGAGATCGATTCAGTCGCTGCATTTAATCGGATGCATCTTGCTTAGGGCTGTTACTTACTGTATATTGTCACGCGCACCATTCCGAGCCATCAATGGACATCATGCCCTGTAAAAAGGTTGGCCATTTTAGAATGGCCCAAGCCATCTTGGAGACCTTACAGCTCCCCAAGCTTCAAGCAAATGGTTCGGACCATAGAATGTGACCCAAACGAGTATTTTGCAATCCTCTCACGCAATGATCAATTTATATAATCTTTGAGCTCATGGGCGCTCTCAGGTGAAGGGATCGCCTGCTACTGTAGCTTAATGATGCCTAGGTTGAGCACACAGCGTCGTTATTGTGCTATGGACACGAAAAATGCAGATCGGCCAAAGGGAGACATAGTCTTACTTTAATATGCCTACGGTGATCTCCCGGCGAAAAGCATTAATCAGATGGCGATACAGAGGTCTCAAACATGGGGAATCCAATGAAGCTGGTTAATGCTAAGGGGCACATCTATCGATCCCTGGTCATACCGGGATCTTACTAAACGGGGACGAACTTATGATCATCGAATCATAATGACTATTACAGAGGCTCGTTAGTTCATGCGGGCTACTGATGTCTGATATTATTAACCGGACCCAATCAGGCAAGTGCGGTCCTCTGACTCCGTATGGCTCCTTACGCGAGGACTCAAATATACAACGCTGCGTACTGCTGTGTGGAGGCTCAACTTTCTCGGTGTTCAACATCTGCGAGTCGTCGGCACCCTCCAATACAGGGGTAGGGGACTGACTTGCTTGGTAAGTCAAGGCCAACTCGGTTACCAACAGAGAGGTGTTAACTACTGCGTTCGAATCGTTGATATCGAGTGTGACTCTAACACGCGTGATCGTATGCAGATTTCCATAGCCGGTTACATATTCATCCCGTACGAGCGAGTGGACCAGATATTGACATTCGCGGGCGCGCCGTAAGGTCCTCTTGCCGGAGCGAATATGTTTCCGTACAGGAACCGTCCTTGAATGTCAGTCTATTTGGGCGAACGTGAGCGATTTTCTGGCGGATTATCGGATCAAGTATAAGCGAAGGGGTCCCCGTAGCTGTGCCATACGAACGGGTGTGCCTCGAAACTGCAGCCCCCTACACCTTATCGTATGATCAATTAACTCTGGGCAGAGGCTGCTTGACACGGAGAAGACACGCTAAATGGACGTTCAGAAACAGGGACGGCTCAACGCGTCATCGGGAGAGAGGTGGGGTGAAGATGTGTCATACTCCATGCATCTGCGAGGCCTATCGGCGCTAAGCGGGAAGTCGTCGACGCGCTAGACAGATTTCACTTTAATGAAGTTCACCGCTCAGCAATTAGCCGGACAGTGCCCGTCCAGATAAACTTAACTTTGGGAGGAGAAGAAATAACACAACGTAACGTACGTCGCTTCGACTCACGGTCGTTATCTGATAAAAAGGGCGGGATGCGCAGTTTGCTAGATGTTGGTACTCTCTAAGCCTGGTACACCATTTGAAAGACTTACACTTACAATTAACCGGGATTAGGTAGGATACTCGCAGTACGCTTACCATACTCTGGATACGGGGAAGGAATTCGGTTTCTCTGTTTTTCTTACACTTGGGACCTGGCGTATCCATCTGCGCAACGTTTTGGCGAAGTATATCCAGGCACTAGCGTGAGAAGATATCGGCGCTTAATCCCGGCTGTCGGATATTTTGCAATTGGTTTGGCATCATCGTCCTAGTATCTAGTCAAGTTGTCGTTAAAGCAAGCACGCGAATAAGGAGGCGAATACCAAATGTTCGGATTAATAGAGCCCGATAAACTTCTTCGTCCGTTGGCGGTCCCAGGTTTGCCCGGGAACCAATAAACGTATTTCTAAGACTCTAGTAAGTCCGCTCTCCCATTCTTTCTGAATAGTACTGATCGGGCCTAGGCGGGTCGTAGCGCCTGCTTAAACTCTCCGAGCTCGTCCACGTTCATTTAAACGAATACCCCGTAACACCATTCTGCACATCTAGCACTCGTTGGAAAGTTAGTCCAATCCTTCTACCTGGCTATCCTTCTCATCTTCACTGCTGACACTGCGTGGACCAAATATTTGTTATCCAACTTCTGTGACCGTGATGGCTTCTGTTAGGAGGATAGCATGTTCACGGAGCAATAGATCTTATAGCTCTTCTAGAGTTCGGCTATTCTCCCCGTGAGTCACCAGACAGCTCCCA +8 TTGACTTTCCATATGTCACAGATCAACTGACCACCTAGGGTCAAAGTTTCTCCGGGGGATGTACATGCATTTGTCACACTCTTGACAAAGCGGATTTGATCGCCTTGCTGCAATCGACACTGTAGGTGACATATTAGCAGGCTGGGTCCCTATGCCAGACCCGTCGTAACTAAAAGACCGAATTCGGGAACTTCTCGCCGGAGTCCCTCATGACCCGTTTGAATATTTGCAGAAGCACCCTCTTTGTTTGACGAAGTACATATTAAGAATGTTGTAAGATGCATCCAAAGGACCTAGGCTGAAGACCACACTATATCTAGTACTAGTTCTCACCTGCCCAACGAACAGTGTCATACCTATCGGTTGCTCTAAGTTACACAAACATCTTGTGATTACGGGAATAGAGCTGAGACCGTGAGCAGGGAGAAGCGGATCGCATGGAACCAGGCGACGCCGCCCAGCGGCGCAAAACAAAGCCGTTGGCGTTACAGCTATCCTTAAGTTTTCTCCCTCCTACGGCTCTAGTGTACTGGTTGGATTAGCTGATTCTACCTTGGAGGATTAGGGTGAGTTACGGAATCTAACTAGAGCGTCCCGACGGTCCAATCTTACATCGTGGAAGTAGTTAACGCTGTTCGTATTGCGATGCAAGTCGCCCGCATAGGACTACGGTGCCGTCTTCCCAAAAACGGCAAAACAGGTGTCAAGTATTCACTGACCTAACCTGGAGGGGAAACACGGGATCCTCGCTCGGCCGGGGGCCCACTGCTTCCCACGCACGGAAAAATCTGCCCCTGGTACTCACTTATCATGACAGCCCCAGAGACTCCCAGTTGGGATCCTTAATGCCAGCGCAATACTGTGCCGAATTCCAGTCTCGCGATTGTATGGCGCAAGTCCCTTGGGATTTTTCCAATCCAGTCGTACCGAGGCGCTCCTTGTATCCACTCCATCGCAGATGCAAGTCGTCTGGACGATACTAAAGAACAAAGATAGGCTCTACGAATTTGTATTGGCACCGCCGACCGGATGGTGAGATCGTTAGAAGATACGCTGGAGCGAAAAGTTCGTCACGTCGCTAGCGAAGTGTACCGGAGCTTGGACACCAGTGGTGAATTTGCTCCGCGCGCAGAAGACCATCTGACACGGTAACTAAAAACTACGACCCCGAAATAGCCAGAATAGCTAGCATGAGTAGAATGGACGAACAGGAGGGATTCGCTGAGCGCGGTGACCGAAAGTTCGTGTACTTGGTAATGCGTTCTTAAATCCGGCATAATGAGACGTAGCACTAGTGGGTACTTAACCTTGTACGGCGCCGACGATTCACCTGACGATCGGATCGGCTTGCCTTCTGTATTGCGGATGTAGTTTGTCATATTAGATACAATGTTCTACATCAGCCTGAAAGCCACAAGCGGAATTCATTAGTCTGGCCTGAAGGCGTTTGAACTTAACTACACAATTCCTGGGCCCGGAGAGGTGCAAGCGTTCTTACAGAACACTACTTTGGAGCGGAATCGGTGTTGCCATAGCAGAATACGGAAATCTATGGACTTCTAATAGAATTCCAGTATGATGACCGGACCCGGTTCTTCAGGTAATGAGACGTGTCGGCAAAGTGTAAGCGTTTATGTATATGTCAAGCCACCGATGGACCAGAAGGAAAGCGAGCTCTAGGCTGAACTCACTATCAGATGACAGATCGTTAATTAACGAATGAGGTGGTGGTCGGATCATTGGTGTTTGAAACACGTCCTACTTACAAGCAGCTCCTAAAACAGTATCCGTATGCTTTAACAGTTCGTGCCGGAGTGCAGCTGACATGGAGCCCCCTTAAACAGTAAGGATACTTGGTGGACTAAGTGGCTTGAGGGTAACTGGACCCGACACCGTCTTCCTGGTTGCGACAGAGAAAGCAGACCTCAAGCGTCGCCAGTAACGAGATAGGAGATTCCAAGCTCCGAGTGCGGGAAGCAATCACTATCGCTAACTAGAGCAACAATTAGACGTATCCGAATACGGGCGGTTGGAGCCGAGCTATCTAGACTTACGAGCCAAAGTCTCTGAAGTTATTAGCTAGGGTACGTACTCTTGGGTGGCATCCGTAATAATCTTGCAGTCATTACGCCCAGAATTTACCCATGTAGATACCTTGCAAACACTGCGACCGCTGCACAACTCTGCATGAGGAGACCTAGATACGGGTACGCGCATCTTTGGCTAATAAGCTAACTACGAGATTACTTACTAATGCGACCGATGAGTAACGAAGGGTTGAGATTGACGCCCGAGGGTTTAGTAACGCCGGTCGCCAGCTGTCATACTCGTGGCCTCGTTTGCATACATGTCTAAACGCCTAGGGGGTCGCCATATGACACCAGCAGGGCACAGTGTCCTGGCCCGCTTTCGTGAATAAGATGGAATCTCTCGATTCTGACCGAAGGTCGTCAACTATAAGCCATCAATCATTGGGGAGATTTAAAATTACCGATCGGCACAGAGTGGGGTTACTTAGTACCGATGCTGCTGCCTCGGGCAGAGTGCCTCGTTGTTACATCACTCGGACATCGCACCGCAGCCATGTGTGGGCCGCTCTCTCATAAAAAGAGGCCTAGTGCACCCCGCTGCCCAATGGATAACGTACTCGAGTTCCGGGGTATTCGTCTGCGTTGACACCCGCCGCCCCTTGTGTTCGCCCATAGCCTGTCATCATCCTCTCACAGAGGGACCTGAGAACATTTCGCCTTTGAGCATAGCAACTGACGAAATGAACTAATCATTTAGCCGTATCGACCTCGTGTGGCTACTCCAGATTTTGAGCCGTCGATAAATCAGCGAGGTATGCCGGAACCGATCGGGATGTCTATAGTCAATGCTCATGTCTATCTACGAAGATTAAAGGGAATGAGGGGTCAGCCAACACGTTCGTCAATTTAGGGGGCTTTACCTCCAATACCGCGACTCGTAGAACCAGGGACCGATTCTTCGTAATCTCACTCTCGCGCCGCCCTCCGGAGATGCTGTCGGTACATACGGGGTAATATCAGTTATCGACCCTTTTCCTGTGTAGTTGTATGCTCATACATAGAAACGCTTGAGCACATTGTAATAGTCAGATGTGCGCGAGCCCGCGCGCGCCTCAGCCACCATACATAAGTGTCCAGGCACAACCTAGCCTATCTTTATGAGGGGCGTCCTATAGGTACATTCTCAGGCACACTTTTGCATCAAAGCAGCGTTTCGACATAATGTGGGAGATCCATCACACACTTTGGGCGCTTGGGGGGTAACTCCCTCACCGTAGACAAACCGCACACCCTTGCGTGTGGCACGTACGCGTGTTGGGGCTTGTGATTGAGCTTCGTAGCTTCAGACCGAGAACCTCTGGCTTGGTGTTGTTCGTTCTTAGCGTGATAACTGAAAGAGGGTCAGGTCCCGACTATCAGCCGCGGGCCGGCGCCGATTTCGTGGGAAGTACGGCTAAAGCTTCGAGTGATAACTGAGCCGAAAAGGGCCGCTGGTAGCCCGATTTACACCCGATGTGCTCCGTCTGAAAGTCAGGGCCGCATGAGTTTCGGTCGGTTATCAGGAAAACCAATCCTATGGTATTCGCATCGCCAGATCTTAACGCCACAATGTACTTGAGGCGGGTAGACGAGCATTACATGGCTAGGGGACTCATCATCAAGAGCAACAAGAGCCTGACAAGTAGGCTCTAGCGTTAAACCGTCGATACATGCATGCCTAGTAGTTGAAGTAATACTAGCGTGCTTGGTTAGTCTTCGAGTTCGAAGAACCGTTAGAATCATTGTCATTATTGCGTCGGACAGTTGGAATTTGCGACTTCTATAGCGTTGCTCGCCCGCCACTTGGCCGCGCCGAACGCTAGTAATGTGGGACCGTCTGGCTTATTTCTTATGGTAATTCGAGAAGGACAGTGACCACGAACTCTCTAGATCGCTTATCCCCGCGACTAACAGTACTAAATATATGAACGAGCGTTGATTTACGGTCTCACAGTAGCAAGGGCAGATGGACGTATGATTTGGCTCGTCGGTGGGATCCGTCGTATACTTGCCACGGGCGGCTCTACCGCTACCTTATCTGCTCCCCAATTGTGTAGTAATGCACGATGAAGTGGCGTTATGCTAAACGCGATTCACTACCGGAGCGGTTACGGCCTTCTAACCACACTTCAGGCGCACCTACGAAATACCAAGCTCCTTCCCCTTTATAATACAGGGCCTATTGCAGCGTACTGTGGCCCGTGCCAAAACAAACATCCTTTTCTCCTATTTGCTATCGTCTCGAACAGCCTGAATAAACTTCGTTGTAGTTGTACCGCTGTTGGTTTAGTCGGCTAGAGAATACTCGATCTATCGGTTTCACACGCGCACGAGTCACACTCACGCTTCAATTCGTGGAAGTTGTGACAACCCCCATACAGTACCAAGCTCAATATGTGTGCGTAGACGGGCTTGGATGGCTTTCGATTAAGCAAGGTATCCTGGTGGACAGTGCAAGCTCTACATAGCGGATTAAGGTAATGCATCAATGCACACTTCGTTGTGACCTTTGCCTCAGCGGAGTATCCTGTTATACCTCGCACTTTCACTCGAAAGTTGCTGGGTGGCGACATGTCACATAGCTATATTCGGATACTTGTACGACGAATTCATCCGCCCGTCCGTACCAGATCGACGGAATTCTCAGTATTTACAGATAGCCATGTGTTATTCTAGCCGGCCGAAACATGCAGGTTCCATCCTTGCTTGACAGTATATTTTGAACAGATTCCACGATCATGGGTAATCTAGGGGGTATCGCATTGCCGCCGAACCCTATCGGGTGGGAGGCCGTAGCAATGTGTGAGTCTCGCCTCGCATTGCTTCATGAAAAAACCGGAAAAAGATCAGATATCGGCCTAAGTATTCGACCACCTACAAAAGCTGGGCACAGAACGGTCGGATGCTTGCTGGGTCGGTGTGATTAGCCAACAGATGGATGCTTCTCATGCTAATCCGAGACCCGATGAGGCTGTCGATATAGGACTATTGACGTGGGGGCATGCCAACAGGATGCATCTAGGTTTGGTTGCAAAGGACACGAGCAAGGGCCTGAGGCGGCCGCGTCGCATGCAAGAGGGGTGATGGATCGAACGGACCCGTCGATGCCTCGAGCCCATCAGGGAGGTTGGATCACCAAGTGATAGGCTTGGCGCTATAAAGATATAATACCGCGTAACAGGAGGGACGGACCCCAAAAACTCGACTCCCGGTGTCGCGTCTCATTATTTTCGGAGTGGATTTAACTGGCGACGAATTAGCAGCCAGGTGGCGCGCCTATAGAGCATTAAGACAGGAGATATCGGCTTATAACGGTTAGTTCCGGTCTGCGGGTCGTCTGGTGTGAGCCATATATAAGTTTATCGGTCGATCAACCCGGCGGCGTACGTAGCATTTGTTGCTTGGTATCCCCTTAATCTTCGACGTAGCGAAATGTCCCATAACCGCTGTGGGGCCTTATTTCGTAGAGTACTATTGGTACCTCTTTACACTTCGGACTAATTGTTTTCTCAAACGGCACCCAAGTCGGACGGATTTAACGTAGACGCAGACACCCACAGCTGCCGTCCTTCTGGCCTGCCTTCTATTAGCGGTAAGCAACCGTTCGCATCACTTGTGGCGTTTTCCGGGATGCGTTACTTATAGGTACCCCACGTACAATGGGCCGTCTTGGGTCCGTAGAGACGCAACACAAGGTACCTTCAGAATTTGTGAAGGCACTAGAATTGCATGGCCTTTGCAGAGCGGACAACCTCCGTTCTCCACAGCAAACGCGGTCGGGTTTTTAAGTGTTCTTCGGATATCGTCATATCTCCCCTCTGGCAACTGGGAGTATACGGCTCTTTCGCAATGCCCTTTTTCTGACATTTAATTAATGAGTATCTGTTTTAGCCTTCAATGGTTTATATTGGCCGCCCCGTCCGGCTCTATGGTCTGCCATATCAATTACCCGTTATAGAAAGTTGGCTAGGCACCGGAACGACACAACCCCGCTTCTACCAAATTAGTGGCGCCTGGGAGGTCACTGAGTCTCGGTCTTTATTGTGGGCCGTAAATGGTGCCTCAACCAAATCCCTTGATACATGTGGGTACCTGGGTCCATTGAGGGATAGTGAATGTTGTATTATTTCACGTTGGGAAGTAGTGGTACGCTTATAAATCTTCACTGGTGGCACGGTATAATATAATACGGGACGGCTCGGTAGTGCCTGCTCTATACGGAGCTTTTAAGAGGAATCGGGGCCAAACTTTTATATCTGCGTGGTAAGTTAATTATCGCTCCTACCCGAGACGATCCTAGCCACCAGCGGTGTCGTTGCCCGATTCTACTAGGGGTAGCATATAAATCCAGCGTCTGTTCGGATTTGACGCATGCAATTCCCCGCCGCACACGTCCTTCCTTAACCTTTCTGTGCCTATTATCCTTAAATCAACTGTCGCTTAGTGTATTCGCAGAAACAAAACAGATTTCCTTACCTTATGATTCTCAATCATAAGTATTGAATATCTCTGGTGTTAGAATGACGATTCGCAGTGAATTAGAGCGGTAGTCCCCCGTGCAGGATACTTTCGTTACCGGGAGTACGATAACCAGACGACTTAGTAGGGTTATTTGAAAATACGGCCGGACCAAGCAATCGGTAAGAATTAAAGGATGTGTGAGAGACTTCTTCCTATTTAGGGAGTGGGAGTCTATATAATTTATCCTCCCGCCTTGCAAACGGCACCCGCGCCTGTCCAACTTCCGCCTTGCTCTGCGACTTGTAAGGATACTAACACGCGCCGTTCAACCTATCCTCCCGTATAGCCGATCGCGTTATGGAAATGACATCCCAGGGATTGCAAATAGGTTTGTCACAGCCCTCTGGTGCGATATTAAATTTGAGCAGTCAGTCAGGGAGCGGGTGCGCCCGTATTACACATGTTATCTTACGGTGATCGAAACTGACACATTCATCAACGATGAAAGATTCGAAGATGTCCGTATGCGCTACTGTTCGGATGAAATAGTGTTAGGATTACATACAAGGTGGTCATGGACGCTGATGCACGAAACTTTCACTCCGATGGCAGGGATGTTGTTGAGTCGTCACGCACGTTCTCAGTGAAGGCTTCGAAAACATCCTGAGGCTCCAATGCTTACGTGAGCTCTCTCGCTCACGCCTCGTCAAGTGCGTGCGAATAACAGCCTGTCTAAGCTCGTAGGGACCTGCGATGCCGGTCGATCCGTAACCGCCACCCGCAATACACAGCTTGACGGCGGAACGGCTACCCCTGGAGACCATGGTAATGCCGGGACACCGTAACCATATGCCTGCGACCCCAGACCGTCGCAGCTATTGACACAAGTTAGTAGAGGATTTGATCCATGGAACGGGCCATTTTTTAAAGGGGAACGTGGTCGATTGTGAGACCTTATCTCTAGCTCTGGCGTAATCCTACTCCCTAAAATTATGTGACTCTTGTCGGCGTTCGCAGGTGGGCAAGATCGACCTGAAGTCCTCGATTGCTCACCGACGGCACCATACGCGTCAGTCACGCTCCTGTAGCGGCGCGTATCCTCTGTTCTTGAATTAGTTAGACACCTTTGAAACTGGCCTATCCACCCCTATTAATAATGGTAACTCACACTCGTGACAAGCGTCGCGCAGTCAGACCAATAGGGAGCACGAGATGTCGGGTTAGGATTTAGACTAGTATCGTTGAGTTCTAAGCAGACGCAGCATTTAATCGGAGGCACCATGCTTCGCCCGGTTAATGTCTAAATGATGTCACGGGCGTCATACCGAGCCATCACTGGACTTTATGCCTAGTAAAAAGATTGGCCACTTTTAATGACCCCATGCGATTTTGGACGCTTTTCAGCTCCCCAGGCATTTGGCCAGTGGATCTGACAATAGAATGTTACCCAAAAGGGTATTTTGTAATACGCTCACGCAATAAGCAATTCACATCACAGTTGAGCTCATGCTTGCTCTGACTTGAAAGGATCGCCGGATATTGTATCCTAACGATACCTCGGCTGATCACGCTCTTTCTTCTTTCTGCTAGGGACACCAAAATTGCAGATCGGCCTAAGGCAGACAAGGACTTACTGGAATATGCCTACTGTGTTCGCGCGGGGAAAGGGGTCACTCAGATGGCGATAGTGCGGTTTCAAAAATGTAAAATCCAATGCAGTTTGTTAATGCAAAGTATCTCTCCTATTTCTCCCCTGTCATACCCCGATCATACCAACCGGAAACTAAGGTTTGACGCCATAATCAAGATGAGTATGGCTGCGCCCCGAAAGTTCATGGTCGCTAATAATGTCGGATATTATTAGCCGGACCAAATCTTAGAAGTGCGCTCCATTGACTCCGGATTACTCCTTACGAGGGGCCTCCAAAATAAAACGCTGTGCACTGTTGTTTGGAGGCTGACCTTACTGGGTGACCTCCGACTGCAACTCGGCCTCGACCTCCGATAGACGGGGCGAGTAATGTCCTGTTTCGTGACTCAATCAAAAATAGGTTAATGTAAACGTGGTGGCAACTACTGAGTCCGAATCGATAACCTCGCGTGAGAATCAGCCAAGTGTTCTCGTATGCAGTTTTGTAGAGCCTACCACATAATCCTCAAGTATCGGTAAGAGGAACAGATTTTGGCATACGCAGGCGCGCGTTATGGGACTCGTGGTCTAGGGAATGTGCTCCCGTATAGGAACTCTCCTTGAATGCCGGTGCAATTGGGTGAACGAGACCGATTTGGTGGCAGATTTTAGTTTAAAGTTTAAGCAAAGGGTTCCCCGAGGCCGTGTCATAAGAACAGGTGTAGAACGAGTCCCCGGGCCCCCACCCCTTATAGTCAGATCCACTAACTGTGGTCAAAACCTGGTTACCACCCAGCACACACGAAAAGGGGACGTTTAGAAATTGGTTCGGCTCAGCCCGTCAGCGGTAGAGACGCGGGGTGAAGATGTGTCATACTCCATCCATCTGCGAGCCGATTCGGTGTGAAGGGGTGCGTAGACGACGCGCGAACCTAATTGGAGAAGAATAACGTTCACCGCGCACCGATTACGCTGAGACTGTCCGTCGCGATCTACTTAAGTTGCGGAGGAGAATAAATAACATAGAGTAACGTACTTCGCTCGGCTTCAGGGTCGGTAACTGATAAACAGGGCGGGATGCGGTGAGCTCTAAAAGTTGGTCGGTACGAAGCATCGTACACCTTTTGAAAGATTTACACTATCCATAAACTGGGTTTTGGTAGGACACTCGTTGTACGGCTACCCCACTTTGGCTAGAGGGAACGAATTCGGCTCCTCGGTGCTTCTTACGCTACGGATCAGAGGTCTCCATCAGCACGACGTTTTGACGCACTAGATGCGTGCGCTAAGGCGAGAAGACAGCGACACTTGACCACGGCTACCCGATCTTTTGTAATTGGGATGGCATCATCGTCCTAGTAAAGAGTCGAGTTGACGTGAAACCAAGGCCGCTAATAAGTAGGCGAATACCAAATGATCGGATGAATTCAGTCGGAAAGACTTCTTCGTGTGTTTGCAGTCCGAGGTATGCCCGGGAATAAGAGAACGTATGCCTTAGTCTCCAGTAAGGCTGCTCGCCCAATATTTCTGAACAGGACCGATCGAGCCTGGGCGGGTCGTAACGCCCTTTTTAACTCTGCTAGCGGCTTCTCGTACAAATAAACGGCTCCCGCGTCAGACTATTCCGGACATGATGCACTCCTAGGACAGTAAGTACCTCCCTGCTAAGTGACTATCATTCTCATGTTCACTGCTAACTCTACGTGGGCCAAAGTACTGTTATCCGACTACTGTGACCCTGACGGCCTCTGTTAGTATGATTGCACGCTCAAAGAGCACTCTAACTTAGCGCTCTGCGGGACTCCGGTTACAGTCTCCATAAGCCTACAGGCAGTCAGAA +9 TTGACGTTACACATCTCCAAGACTAACGGATCTCCTCGTCTGAAACGAACTCTGGGTGAGGTAAATCATTCAGAAAAAGGCCTCATACAGCAGGTTTGAAGTGCTCGCTCCAGCAGACACTGTGTTTGAGATAGGAGCAGTCGGGTTGCCTAATCAAGACTCCTCGTAACTAAACCCACAAATTGGGGAGAATCTTTCGGTAGTTCCAGCCGCCATTACTGAATATGTGCAGAAGAAACTTCTGAATTTTCAGAAGTACTCGTTAAAAGCGATTGTTTGTGCATCCTAACGACTTTGCCTGATGACTTCGTCAGATCCATGCCCAGTTTTCACCTGACGACCGGACTGTGTGAAACAATGCGGTCGGTCTACGGTACAAAACTATCTTTAGATTTTGAGTAAAGGGCTGAGACGGAGAGCGGGGAGTGGCGGACCGCATTTTATCAGACAACCCCGACGAGCCGCGCTAACCTAAGCAGATGGTGTTAGAGATGTCAGTAAGTTGTCATGATTCGCCTGCACAAGAGTACTGGTTGAATTAGCGAATTGTACATTGGACGATAGGTATGATTGGCAGAATCTGACGCGAACGGTCTGAGCGGGCAAGTCTCCAACTTGGAAGTGGTTAGCACGTCACTTGGCTCGATGCAAGGCGGGATCCTAGGCGTACGGTGCGATCTGCCCAAAACCGGCAAGACTCGTGTCCTGTAATTAGCGACCTATCCTCGTGGGGAAAGATTGCATGCACGTAAGACTGGCAGCCCCCTCCGGTACCCGTACCGACGGATATTCGCGGGGCGCAACGTTAATAGTACACCCTTAAATACTCGCTGTCGGGTTCACCAATGCTAGTGGAGTACAGTTTTAGCTTCTGGTCTTGCGAAGGTAAAGCGAATGTATCGTGGGGTTGATCCAATCCCCTCGTACCAATACGCTTCTTGTTTCCACTCCTACGCAGAGGCAACTCTTCCGGACGACACTAAAGTATCAAAATAGGCTCTAAGACTTTCTACTGGCACTGCCGACCGTTTGGCGAAGTCGTCAGGGTCTACACCGGGTCGAAAAGTTCGGTACGTGGATAGCGAAGGTACCCCATCCTTGGACACTAGTTGAGGTTTTGCTTCCCGTCTAGCTGACTCCCTGACACGGGAACTCAAAACGACCTCCTCGAAATTGCTAGTATAGATAGTTTGCTCAGAAAAGACGAACAGGCGCACCACTCTGAGCGCTGAGACTGGAAGTTCGAGCCTTTGGTCGTACGTGCTTAAATCGCGCAAAATGAGCCGTAGAACTAGTGAGTATTTCACCCGGCACACGACAGGCGACTCCCCTTAGCGGTGGTTCGGCCTGCCTTCCGTTGTCCGAAGATTCACTCACCTGTGTGATACATTGATCTCAGTCAGGTTGGAAGTAAGAAGGGGAACTTATCATGCTGGCCTGAAGACATTTGAACTCCACGTAACAAGTACTGGGTCCGCAGAGGTGAATGAGTGCTTAGAGGGCTACTCATTCGGGCGGGATCTATGTTACTAGCGCCGGATACTGATACGTCTGAGCGTCTAAAAGAATGCCAGTATGTTGACCGGGCCCGCTGTTTCAGGTGATGACACGTTGCAGATAAGTGTATGCGTCCAAGTATGAGTCAAGCTAGCGACGGGACAGAAAGAAAGACAGCCTTTGCGTGAACTAAGGTTAAATAGACAGATCCCTAAGTAGGGTTTTAAGTGGTGGGCGGGTAATAGGAGTTGGGAACATGTCCTTCATACAAGAAGCTCCTTAACCAGGGAGGGTCTCCGACCACAGTAGGTGCAGCACATCTGCTGAAATGTAGCACCCTTATGCAGTAAGGTTACTTTGTGGATTTTCTTCCGTCTGGGTTACTGGACCCGACACCGTCTTCCCGTTCTCGACAGCTAAAGCAGACATGAACTGTTCCTTGTAAGGAGATAGTAAGTTCCAAGCCCCCCGTGCGGGAAGGAATAACTGACGCTAACCCGATCAATAATTTGACGAATACAACAAGGTGAGGGTGCCAAAGCAATAACTGCACTTACTATCCACAGTCTCGCACGTTATTGGCTGCTGTCCGTATTCATGGGTTGTGATCGTAGTAACCTTTCCTGCTTTACGTCGAAAATTTACCCATGGAGGGAAATTGCAGACGCAGACACTCCAACATACCCCTCACTGTCGGCTCCTAGAAATGGGTACGCGCGTTCAAGGATAATCAGGTAACTGCTAGATTACCAGTCACTGCGCCTGATGCTTAACCAAGGTTTGAGATTGCCCCCCGGGTCTTTAAAAACGCCGGTCGCCATCTATCACCCTCGCCGCCACGTGAGCATACATTTCTAAACGGCTATGTGGTCAGAAGGTGGCATATGCTGGACACACTGCGCAGGCGAGCTTTCTTCAATACGATGGAAACGCCATATGAAAAACGAAGGCCGTACAGTATAGACCATCAGTCCTTGCTCACATTTAAAATTATAGATCCCCAGAGAAAGCGATAACTAAGCACACATCCTTCCGCTACGCGCAGATCGGATCGCAGTTAGATCGCTCTGACCCCGCAGCGCCGAGAGCCTCGGCCCGTTTGCTCTTAAGAGCCGGCAAAGTGCACACCGTTGCGCCATGGAAAATGGAATGGAATTCAGGGAAAGTTGTCTTCGTTTACAGCCACAGCCACTTGTGCGCGAGAAGGGGAATTGTTCCTTCTATGACACAGGCAACTGAGTACATTGCGCCTCTGAGCATTGTAACTCACGTAGTGCCCTCATCAATAAGCCGAATCTACCTAGTGTTAGTCCTCCGGAATTTGAGCCATGGGTAATTTAGCGCGATATGCCGAAGCCAAGCAGGGTGTGTATAGTCAAGGCTCAGGACTAACCACGAAGACCATTGGGAGTGATGGGTGATCCACGACGTTCTCAACGTTGGGGTGCATAACCTCCTACCCCGCGACGCGGGGTATCAGTGAACGAAAATTCGTAAGCTGACTTACCCTCGCCCGTCCTGTGATCTTCTCGATCCGTATAAGGAAGTCTGAGTTATGGACCCTTTGCTATTGTTGTTGTATGTTCATACATTCCAATTCTAGTGGTCAGCGTACTAGAGTGATATGCGCGTTCCCTAGGGCGGCTCAGATACCATACGGAAGTGTGCAAGAACGACTCAGGCTCTCTTCATGAGAGGCGTCCGACAGGTGAGACCCCAGGAACACTTTTACATGAAAGAAGCTTTAGGACCTACTGTGCGGGATCCATTTCACAGTTTGTGAGGCCGGTGGGCAAGTCCCTGTTAGTATAGAATCCCATCGGTCTTGCATGTGTCACGTACGGCTGGTGGCGACTGTGAGCGCGCCTCGTAGCTTACTACAGAGCACTTCGGGCTTGCTGTTGTTCTATTCCAACCTGACAGCTGACAAATTCTCGGGTGGCGAGTCCTCGCCGAGGGCCGGCAGCGCTTACGTGGCACGGGCGGGTAAAGCTTCCGGTGATGAGTGACCGGAACAGCGCCGCTGATTTCCGAACTTTTACCCGATCTTGGCCGTCTTAAATTCAGGTCCGGATGAGATTAAGTCGTGTTTCAGAAAAACCTACCCAATGGATATCACATCAACAGAACTTACCCCGACTCTGTAATTGATGCACGTCGACGAAATTTACGTGAGTAGGGAATACATCATCCAAAGTTGCAAGAGCGTGATGAGCCGGGTATAGCCTTCAGCCGGCGGTACATGCGTGTTTTGCGGTTGAAGGACTAGAAGTGTGCGTGGTTTCTCTGGGATTACGAAGGACGCTTCGAATCCTAGTCATTATTGCAGATGACAGTAGGACTCCACCACGTGTATAGCTTTGTTCGACACCAACTGGTACGCGACGTAAGTTAGGCACGTTGAACCGTCTGGGTACGTGCGTTTGATAACTAGGGATGGTCAAGGACGGCTAAGCCTAACGAGCCCTTATCCCCCCGACGATCGATTCCAACTATATGCACGAGCGTATCTTTGCGATCTAGCAGTAACAAGGCTATAAGGAAGTATGATATGGCTGGGCGATTGGATCCGTCTGAGACTTGGAACGGGCTCGCTGACGGCGACCCTATCAGATCCCTAATTGTCCAATGGTTCACGATATCAGGGTGATACGCTATACGCGATTCACTACGCGTAACGTTCCGTCCTCCGAACCGCACGTCAGGCGCACGTAACAAATTCCTAGGTCGTTAGCCTTTATAATGTCGGGCATATTGTCGCGGCCTGTTATGTGCGCCGATACAAAGATGGATTGCTCGGGTGTGCTTTCGTCGCGAACTGGATGCATAGACGTCGCTATAGTTCAGCGGCTTTTGGTCGAGTCGGCGAACGAGTAATCGCTCCAACGGACTTACACGCGTACGCGTCACACTCGCGCGTCAATGTCAGGGAGTCGATATCACCACCGTGCGCCACAATCAGCAATAAGCGTGCGTTCACGAACTAGGTTCGATATGTATTAAGACCTGTACCTTAGTCGATGATCTAAGTACAAATGCGCGGACCAGGGTAACGCATAAATGTACTCATCGTTATGACGTTTGCATGAGGGGTGTATCCTATGACTCCGGGTTATTAAACCCGATAGTCGCGGGGAGGCGGCTCGTTGTCAAGCGATTTTCCGATACTTGTACGTCTAATTGCTTCCTCCGTACGTACCAACACAAAGAAATTCTTGCGCCCTACCCTTGGCCGCCTGTTAATCTTCCCGGACAAAACATGCGCGTCCCATTCGTGCATGACATTATATGTGGAGCAGTTTCCACGAACCTCGATGATCTAGGGGCTATCGCCGTACCTGTGGAAGCCATCGGATGTGAGGCCGTGGCAATGTGTCAGTCACGCCCCTCCTTGCTACATTCTAAGACAGGAACGCGATCAGATATGGGTCTAAGTATTAAAGGAGCTAAGAACACAGTGCACAGAACGGTCTCATGCTGGCTGGATGGTTATGACTACCCCACAGACGTGGGCAAATCCTACAAAGCCGAAACCCAATAAGGTTGTCGATTTAAGAGTATCTTCGTCAAATCTTGGCAATAGGATGCATCTAGGTTTGGTGGCAAACGACTTTGGGCGGGTCCTACCGCTCACGCGTCGCTTGGGATCCGGGTGATCGATCATACGGCCCTGTCCTTGCTCTAAGCGCATCTGGGAAAATGAATCATAAAGGGACAGATATCGAGATATAAAGATATGACACAGCACAAGAGGGGAGAAGGACCCGATAATCACGTCGCCCGGTATTGCGGATCATTACTTTTGCAGTGGATTTAAAGGCCTACGAGTTCACAACCCGGTCGCGTCGCTGTCCAGTATAACGGCTGAAGAGATCGGCTTGAGATTGTTGGCGTCGATCTACAAGTAGGCTTATGTGTCGCGTAGATGGGTTTGGGTGTCGATCTACCGAGTGGCGTAGCTAGCATTTCCTGCTTGGTATACCCAAACAGCTTAACGTTGTGGAATGTACGATTACCGATTTGGGCCCGCATATCCCATAGTACTATTTGTGTCCCGTTACACTTCGTGTTGATTCTGCTCTGATTCCGGCGCTTAGTTGGAAGGAATTAAGAGTGACCGTCAATCCCCACATGGCGTACCTACACGCCTGCCTCCTTTTTGAGATAAGCAAATGTTCCCTGTACGTACGGCGTTTTCCGGCATGGGTTAAGGATATGTAGCGCACGTGCAGTGGGCTGTCTTGGCGCCAGAAAGACGAAACGCATGGCGGCATCAGACTCTGTGCATCCACAAGTATTTATCGCCCTGTACAAAGCGCACTCCCTCCACTCACCACAGCGGACGCGATCGCATCTTTAAGCGGTCTTTGGTTACCATTATATCTCATCGGTGGGAACTGCGAGAATGCGAAGCTTACACCATGCTCTTATTCTGAGTTTTAATTTATGGGCAGCTGATTTCACGTTCAATCGTATATATTGGGGGTCCCGTCGGGTTCTATGGTCTCTCATCTTAATTGCACGTTATACATAGTCAGCTAGGGACCGGAAGGACACAACCCACGGTTTAAGGACGTAGTGCGGCCCGGGAGGTCAGTGAGTCCGATTCTTTATTGTGTGCTGTAAACGGTGCAACAACCTTATCCCTTGTTAAATGTGCGCACCTGGGTCCCTTGGAGTATAGTGACTGTTCTACTAAGTCCCCACGGGAAGGAGTTGTACACTTAACACTCCTCCGTGGAAGCCTAGTCTCATCGCTTACGTGACTGCTCGGTAGACCACGACGTGTACTGAGGTATTATATGGCCTCGGGGTCGTCCATTTATATGCACGTTTTAAGGCAATTGGGGGTCCAGGACGACTCGCTCCTACCAACGAGTTGTGTCATTCCAAGATTTTGTCAGGGGTAGTACATACTACCCGCGTCCGTTCTGATTAGACCGATGCTATTCCCCGTCACATAGGCCGTACCTAAACCCCTCTGTGCATTTCAACTTCAAATCAAATCTAGTTTGGCGTATTCGCAGAAACAAATGATGATCTCCAACCTTGTGGTTCCCAATCATGGTTAATGCACATACCTGACGTTAGAATGACAATTCGCAATTACTTACAGCGCTGCTCCAACGCGCTGCTAGCATTCGTTACCGGGCGGTCGAAATTCAGACGACTTAGAAGGGTTATTTGGTGGTCCGGCGCGAAGAAAATATCGGTATGGGGTAAGGGATGTGTGGGTGACTACTTACTATAGAAGGATTGAGAATATTAATAACTTATGCTCCCTCCTGGCAAACGACAGCCGCGCCTGTCCCACATCCTGCGCGCTCTTGTTATTATAAAATTAAAAACACGCGTCGGGATACCCAGCTTCCGTTTTCGCCAAACATTGTATGGTCATGAGCTCTCTGGGATTGAAAAAAGGTTTTCCACAGGGATCTGCTCCGTTATTACGATATAGCATTCAGTCCGTGAGTGAGTGATCCCGTGTGACACACCATGTCATCCTGTAAACGAAAACGACACATTCATCCACGACGACCGCCTCGAAGATATGCTCATACGCTCTAGTTAGGGTGTAAAAGAGTGAGGAATACTTAGAAAGGGGTCATGTACGGTGAAGCACGCAACGATCACTCCAATGTCGGGGATGTTGTAAAGTCGGCACGCCCTTTCTCTGTCCAGCCCGAGCAATCAACCTGTGGATCGAAGAAAGCCGTCAGCCATCTTGTTCACGCTTTGTCCAGGGTGTGCTAATCAGAGCCTCAGTAACCTGCTAGGGCCGTACGATTCGCTGCGATACGGACCCGACCGCCACAACTCACAACTTGACGGAGGCACGTCTGCACAGGGGAACGCTGGGAATGACGGGAACCATTGTCCATATGCTGCCGACCGCCCATAATCACAGACATTGCCAAAAGTGAGGATCGGATTAGATCCCTGGAACGTGGCAATTATGCGACGGTACGGCGGTAGAACGTGTGACGTTCTTTCTTGCGCTATCGGAATCCTATTCCCTAACATTAAATGATTCCTGTCGGAGTTCACAGGTGGTCAAGACCGACCTCGGGACCAACTTAACTTAGCGAGGGCGCGATATGCGTCATTCAAGCAGGTGTAGGGGTCAGGATCCTCCGGACATGTGCTGGCGTGCCGCCCTCTAAGATGCTGTTTCCACAGCTTTTATTTATGGTAGGGCACAATTCTCACATCCGTCCCGCAGTTTGTCAATTTGGGATGCAGTGATGGCGTGTGGGCATGATTAGTAGAACCCTTAAGTGTGATGCAGTCGCGGCAGTTAATGCGAGAGTCCATGCCTAGCCACGCTCCTCACTATATTGTGTCTCGCGCGCCATACGTGGTCATCAATCAACTTCATTCCTTGGAATAAGGTGTGCCATTTTTAAATGCCCCTGGCCTTTGTGGACACCTGTCAGCTCCCCTGGCATCAAGCAAATGGTTCTGTCAATATAATGGATCCCCACAGTGTATGTTACTATAGTGACACGCACTAAGCAATTCACCTCACCGTTGAGCTTATGCGCCCACTCAATCGCAAGGCTCGACTCCGATTGCATCTCAACGATACTTAGTCTGAGCTCCCAGGTTCGGTATTTTGCTAGGGATAACAAAAGTGCAGATCGCACTAGGGGAGACATGGTCACACTGGAATCAGGGTACCTTATTCTCCCGGGGAAATGCGCCGATAAGATGGCGATCCTGCTGTCTCAGACATGGCGAACCCGTTGCAGCAGATTAATGCTAAGTATCACATCGATGAATGCCCCCTCTTCTACCGGTCATCCCAAAGCGAGGCGAACTGTTGCTTACCCCGGCAATCTCAATAAAGCTGGCCCCCGATATTTCATTGTCCCTAATTATGACGTGTATTATTTACCGGACCCAATGAGTCAAGTCGGGTCCGCGGACTCCGGAATACTTCTTACCAGAGACGGTAAATATACAAAGCTGCGTACTGTTAGTTGGAGTCTGAAACTCCTGGGTATCCTATCCCTGAAACTCCTGCTCAATCCCCAACAGACGTGTCGACTAATGTCTTGTGTTCTACGTCAAACACAAATAGGTTACCAAAAAGCTGATCTTAACTACGGCCTTAGAATTGATGATACCAAGCGTAATTCAACGATGGGGTCGCTTATACTGCTTTGTATACCCTAGTTGAGATGTCTATCTTAAGAATGCGTTGAACCTAAATTGGCATCCTCAGGCGCGCGGTAAGGGTGTCGTGTACGCGCGCATATCTTGCCGTATAGTAACCCTATCTGAATAACCGTACAATTGCGCGATCGCCAGCTATTTTGTAGCAGACTTTCTTAGAGCGGTATAGCGAAGCCGTACGCCGCGATGTTTCACAAGAACAGGTATGGATCGAGTCCCCGGCCCCCGACCCCCCGTCATATAATGCGTTTGCTGTGGACATTACTTGATTACCACCCTGTACACGGGAAAAGTGGATGCTAAGAACTTGGTACGACTCAAAGCGTCATATCTAGGTATGCGGCGAGAAGACGATTCGTACTCCCTGCAACTGCGGGGCGAATGGGCGGTATGGGGTGAGTCGTCGACGCGCTAACCATAATGGAGTTTAGTGACAGGCACCGTCCCGCAAATCCGCTGACAGTGTTCGCCACGATAAACATAAGTCGCGGGGGCGAAGAAATAAATCAAATTAACATAGTTCGCTCCGATTCTCGGTGGTCATCCGATAAAATGGGCGGCCCAAAAAAAGGTATGGAAGTTTCTCCGCGCTACTTATCGGACAGTATTTGACAGATTGACACTTCCCAGCTTCTCGGGTTTGGTAGGATACTCGTCGTACGCATACCCAACTGTGCATCCCGGGTATGAATATAGGTCAGCTATGTCACTTAGTATTCGGACCACGCGTCGCCCTCGTCACAAAGTTTTGACGTAGCATATCGGGGCGCTACGGCGAGCAGACTTCGGCGCTTAACCACGGCAGCCTGATGTTTTGTACGGGCGATTGCACAGTCGTCCTAATAGCGCGTCAAGTCGACGCTAAAACAAGTACACTAACAGGTACGTGAAGACGGAATGTTCTGATGAATTAAGCCTGATAAACATTATCGTTTCTTTCGAGTCCCAGTTTTCCCCTTGAAGCAGAGAACGTGTATGTGAGTCTCTAGTAAGTCCCCTCGTCCAATATTTCGGAGCAGTTGCGGTCGCGCGTAGGCGGGTCGTAGCACCATCTTTAACCCACTAAGCCCGTCATCAAACAATTAAGCGAAGACCCCGTCAGACCCTTACGGTTATGAAACACTCCTTGGACATCAAGTACTTGCCAGCTAGCCGACTATCCTTCACATCTTCACTGTTTACTCTTGTTGGACAAAATATGAGTTAAGCAACTAATGTAACGCGATCGGCTCACGTAACAATGCTAGCAGGTTCACAGCGTAATAGAACTTACCGCTCTGCGAGTGTCCAACGGGAGTCTCCGTTAGCCTACAGGCCAGTACCA +10 ATTGCTCGAACCGCGCGAGTGACTCTTACGTCAGACTTCCTAATCCATACTAGCCTATTGAAAATTGCAGTGCGTTGAGCAAAAGGTCTCTACGAGGCCGTGTGTCCTATAGGGTAGGTTAAGCTAGACCTTCCGTCGCCGTTATCCTACATGATGTTCTTTAGAGAGGGATCCGGGTTTACATGGACATACTCAATATGCTATCGCGGCGTCTCAGGGCCGAGTTTTGCGAGAGAATAACACGTATCGCAGAGAAACGAAGTACTATTTTGCTGAGGAACGGCCACTGTGTGTTGTTGTCAACAAGGAGATTACGAACGGCATAGGCCAGTATATGTCAGCCAGTTGATAAATTGTCCTCTGCTGGCTGTATTGTAGCAGAGCACTAAAGAGCACGATCACATATGCCTGGGTCGGACCTTGGGCCATTTATGCGGGATTTCAGAACAATAGGCACGTCTACACGATGTCGGAGGTTTAAAGGGATACTTTAGGTCGCAGGGTCTTCCCGACTTAGATATTCCCGGATAGGCAGGCGTTACGCAATGCGTATATATGGGGTTATACACCTGTGGGCTGCATGCAGCTAACTAACCTAACCGTATTCAATAAGAGCATACGAAGTTGACGCCACAGCGGGCCGGAATTACGGCTAACTTTACGACTGTAACGCGATCGTCTCTGCGCGGGACCTCTGGCTTCCGCCAAGTGAAACGCTCCCCAGCCAATTTATCTCCCAGCCCTGACAGCCAGGCTCTTGTAGCCGGTCTACGCGTTCTTCCTCGAGGAACAGGCAGGAATGTACAAGCAAATTACACTTATATCATGCGGTTCGTCAAGCATGTGATCCGGACAGATTTGAACAAGAGCCGACTCTTACTTCCCTTTCAAACCGAGACTATGACTTCCAGATGAATGCCTGGTGGCCATAGTATTGGTCTCACTAGTCGTGGAAGGAGGGACACGTTGTGGAAGACCTTTTGGTAAGTAACACTATCATCGCATAGCAACCACCTCACTTACCTAAGTGGCATGCTGAGGCCAGCAGTGCCCCTAGCCTAAGTATCATTCGAGCGTCACTTTAACCCGAAATACACGACTACCCTAACTACTTTACTAGCTATATAATTGGTAGTCTTGGACTGTTATAGTGGGCACGAGAGGCCTAAATCAGGTACTGATTAAGAAGCAAGGTGCCCGCGCGTGCAATTGCGGTCCATAGGAAACGTCGGTTCGCCCGACCGGAAGTACCCGACGTCTTGGATAATTCTAGTCTGACCAATACGATAATTAGCCTCAGTGCCCTCTGGGACTTGAACGAGCTGCCGCCTTCACGGCCGGCGAGCCTTGGCTCCCAGGTAAATCATGACATCCTTGGGGTATTGCAGGACGGCTTCGCCCATGGCCCGGCTCGACCGGACCCCTTAACACTGGGACGACTTGACAGTGGCCGTCGAAACAACTACCTCATTGGGGATGATGACTCACGATGGTCTTCCAGACCCAGGTCTAGGAAAGGCTCTAGGTCAGAGAGATCCAGCCATTCTCTTCAAGAACCACTTACCCAACAGGCTTTCGAAGCACTACCGCTTTTCGGCCGATTGGGGCCACATGTGTGTCTTACGGACGCACGAAACCAGGGGGTATACCATTCTCGGGGCGATATGGTCGGACGGTTTGCGCGGCTTTACGGACCATAGAGCGCGCTTGCACGTACCTATTTAGGAATTCCGTGCAAGCTTAAAGAAGGTCGGTTTGTAAGCAGATAGCAGTAGATCGTGCTTCGTGAGACTTTTTGACAAAGTCATAGGCATTGGGCTCTGCCAAAACCTAGTGGCACTCTTGGCCGAACCAGGGGCCGCGATTCTCAGGGCATCGCTTCGATGAATGATGGGATTTGCTATACCGGCAACCACGTAAAGTGTGTAGGTCACAGGAAGAGCCAACAGTGAGCCGGCGTCTAAGTTTAGACTGGTAAAGGACGCCAGATCCTAGTGCTGCAACGCGATTCTTGCGTGGCTCTGTTAGTGCCTCGAAAGGCGTAACTGACATTGTCGATCACGGGTCGCGCCGGTGTGACGATCTTAGCCGACTTCTAAGAACGCAGGTCTGCCCATACAGACTGTCAAATGTTCTAGAGTGTACGGGAATTAGGCACTAGGCTAAACTTTAAGCAACGACCCGTGGGTGAGCATCGGCCTGGAGAACCCCCGTCAGCAAGGGCGTCGGAGATATGGGATGAAGGCTATGCAACACTCCAGGATAGCAGCGTACGCTCCGTGTTTGAAAGTATTGCAAGTAGCACAAAGACCTCATACAGCTTGTGACGCATCGTACGGGGTGGCCTGCGCGGGGGAGTGCGATTTACCACGTCATTGTCTGCCCAGGTCAGTAGTCCCCTTAATTTTACCGGGGGGACGCACAACAATTGCCGCATAACGAAACATCTCGGCATGAGTAGGGATAAGCCTCAACCTAGTGTAAGCACAGCGCCCCGAGCCTCGAGGAAGTGATTTTCTTTGTTTGGCACATATAACAGTACTAGCAGGCTATGCGTGTACATTTGTCCATCCTGTCCGTCCAAGGAGATGAATCCATTCACTCTTTTTCCGTATTGCCCCCACAACTGTTCTTAGATTCAAATGGAGCTTAGCGTAAGATAGTCGAACATGTATGTCCAAAGTCTTGGGAGGCCTGCGCAGCGTTGTGACAATACCGATGGCCACTAAGGTGTTACACATTACTGGCTATAAGGATATAAATCCGGTCGACATAAGACTTACGTTCAGTTGTCTCTTTATGGTGGGCCGTCGTTGGCGCCATTGAATCTTGAATTACCATTGTGACTTATCGGGACTCGTATGGGTATGGATTCGCCAGAGCCCGTATCGCGTGGTATAGGATAGTCAAGAGGCAAAATTTTTTGCACTTGTGTTAGCGTTCATCTGTGCGTCCCGGAATCTATGGAATTGACTACCGAGTAAAGTGGAGTAACGCCGGGAAAAGGCCGTAGAAATCCCACGCTAGGCCAAAACTAACCCGGAACAAACGGCATCCTTCATATTAGCAACTTGTCTAGTATAATGGAGGCTTTCAGATAAAGCCAATCTTCTGAGTATCGTGACCCCCCGATAGGCTGGTCGTGTCAGGGTCTAGGCGCCGTCACATTGCCGCTGTGAGGCTCGCCACTCGGTTCAATTTTAAAATGTGCTAAGCCTCTAACGACCGGGGCCACGAGCTTTGCTAGTTCACCTATGGGTGTTTAGGCGACGCCAGCCGCGGATTTCTGACAATAAAGGTTATCAACTTAACAGGAGGTGACGATCAGATGCACTCGCAGGTCCTGTCTGAGGTGAGAGGTCTTACGTACATAATGAGTCGTCTAAACGACGAGGTAGCATGGCAATTAAGCATTAGCGGAGGCGCGGTGGGCTTGCTACAAAACACGAGTTCGGGGGCGAGGCGTACCTGTTCTAAATCCGCAGTGAACTATGACATGTCACGCTGCCCTGGTCAAGTGTACCGAGTGACTTAGCCTCGCTTCGTACCTTTTAGTGTTTCATCAACTTCACAGGGCCTGATCCGTGACTTACCCCAAAGGGCCCTGGCCTTTATCCACATTGCGAGACATGCCGTTGCGGCCAACTAATATCTACCTTTATTAAATTGGGGGGCTGTTCCTTAAAGAAGATTCGAGTTGCCTCAGACTCCTCGGACGTGCTCGAACGTTCAAGTCATGGTGGAACCCGCCCTGGAAGACAGGTCCAAAATCATTTGCTGGTACAATTATGAGCGGTCAATGTGCTAGAATTAACGGGTTAACCGTTGTTCTGTACTGTGAATTGCATAGCGATCTGGGTAGCTAGTATTTCTGCACCCCCATCCAATCGCGGTGTGAAAGATCCACGGATGATACTCTCGCGGCCTCGCTACCCCTCAGAAATGAGCCCCGCTTAGGGTAATGAACCAACAATATTCACTGGGGAAGCTCATGGGGACAGGGGTACAAGGCACCTGGCTGAGCAATTACCCAGGATTGAAGCGCGGTCCTAGTTGGAGGAAGACCGCACCCGGGTAGCAGTCAGTCACCCTATCCTCCGGAAGAGGATGGGGCGTAACCTTATCTCTACAGTTCGCCGCAATCACTTGTCAAGCAGGTAGCCATTTAGTACCATCGTGCCATCATCGTGATCTGCGTCTGGGGCTGCGCGTGCTGCCCGTGGTCATAAGAAGTGAGTCCTTCCTCTTGAACTTCCGCGGATGTGCCCCAGAGGTAGGAGACACATACAACATTGCTCTCGACACAATGTGATTGTTAAACGACCACTGAGACCCCATTTCTGTGTTTGCCGTTTACAACCCAGTCAATTTTAATACCTTAGACGGCTTGTATTATTCCGACAGCATAGTGTGTTATGCGTATGCTTATACAGAAGACGCAACGTGGTGGAGGTTACACTATAGCCAGCCCCCAGCATGGGGGGCCGTTATATGGCTTCAGTTAACAACAAAGTTACTGTGCCTCCAGGGGACTAGTTCTCCGACTTGAGATATGATACACCTACCCCAACCATGAATAACACACTCATAGGCCAGAAGGCTGCTTACCGGCAGATAGTGGTAAATTGATCGATGATGAGTGTGGGATTATTAACTGACAATTGCTAGGCGCCGCTGCTTCAAGGATCCGCGAGGTTGCACGTCCGTAGGGTTGCTAGATGGAGCCGTGATTACAAGGGAAAATGTGACCGCGCCCATCCTAGACCAGGATCTGGAGGGTAACCGACCTACCCAGAAGACGTATGACCTCACATCCCCTTGATCTGTGCACAACTTGGTTGCAGGTATGTTCATCTCCGGGGATTATTGCCATCAGGCTACGCGGTATTTCCGAAGACGCGTTGAAGATACAGTGCATGACATGCCACCAAGCCTTTGGAGGCATCAGTGGCCATCGATTCTATGGTTTAACTGACGCGGTCAATGCAGACATTAGTGAAGGCGTTCTGGCTGGCCGTTAATATCGGTTCGGGGATTGTCGTCCTCGTTTTGCGGGTTTGACTCCAAATGGTATAGAAATCGGTCACGGCAGTTTCTAAAAATTACTCGGAGCATTTTTGTTTGCTGTGAACGCGGGCCTTGGGGGATGGCCGAGTCGTAAGGGACACACCCCGAATACCTAAGTGAAAGCGTGTGTCCCTATTAATGGACTAGCGCATGGGTCGACTCCCAACCGAAGTAGGACTCCAAAAACGCGATATTAGGCGGTCTCGTCAGGACTTCATGGCGAGATGCTGAGCCGTATCCTATTTTGTACTCTTTGCGGATTAAGGAAAGACCCCGTTTCATAGATAACTAGTCGGATCAAAAGAGATTCTTTGCGTCAGGCGTAAATGAAGCATAATCCAATATTTTGCTCTTCTTGCCATTCCTGCCTTCTCGATCGCATCCCTGTCCAATGAATCTCCCCACTCCATTGCCGACTTTCGGTCACGGTCTCCCGCCGTAAGTCGTTAGAACACATTGTGAGCAGTAACGTAGTCTTGTCCGTGAGACGTAAAATACTGATAATACGTTGGTTGGGAGGACTGTTCGGGTCCAGCTACAAGGGCCTTCAGCCCGTTTTCGATGGCTTCCGATGTATGTTACAGTGACCGACATATCCTCGATTGTATTGCGGGACACGTACATCAAACAGTAGACTTTTAACATAAAGACACAAAGCTATAACCGTGTTTTGACGGTACATGGACACTCACCAGCGATCTTCGAGGCGTAGCATGGCATTCATGGAGGACGCTCTACATGTATGATTGGGCGCGTCCAAAAGTGGATCGCATAAAATCTCCGGGTCGAGGGTTTGGGGGGTACTCTCTATAACAAGGAGATTTAAAATACGCGTGACCATACTGCAGTACGGCCCAAAAGGAAATGCGTTCCCGGGTACAGCTTTTTGTTGCGCCGGTTTGATGATATTGTGGCAGATAATCCACCTCGGGGGTAGGCTGTTCTTGGATGCTGTATGAGATGTCATGGCCGCATACCACCGGAGCAACGTGGCGAGTACTCGACCTAGGCCTTCCCACCTGAGAGCATGAGCTGCTGCGATATTCGAGTTGTCTCAGTGATTTACTTATCTGATTCGCGGCCCCACTGACGGCCAGACCACCTCCGCCCCCTGGAATAAAACGCGTCCGGGAGAGACTCCACTCATGCAATGAATTGGTTTCAAAGTGCAGCCCAGTGCGTCCATTGGGACGTTGGTTGCATGCACCGGAATGATGAATACCCCGGTAAAGGCGTGGCGTTCTATGTCTTAGTTTGCTCCGGAGATCTGCAACATTAGTTCCTACCTACATACAGACTATTGCATCTGTAGCAGGAGAACACGGGTCTTCGCGGACCATTTTTGTCAATGTTCAACAACTTAGAACTAAAGTTTGGGCCTGCCATATCCGCCCCTAGTTGCGTGGTTTTATGTCTATACGGATCCCAACCCGTGCTTAAATAGGTTATATGAAGGCTATCGCTTTAATAACGTGTTAGGACGATCCCGCCTCGCAGTTCGAAATGCCTGCTCTCCCATTCAATAATTGCCTTAAGAAACGTGGTTGCACTGGTCTCATCAGACTACTGGACTGAGTACTGATGCAGACCGGCTACGTTTAGACGGTCTCGCCATGTACGCAGTGGCGTGATTGAATACCCCCCCGACGATATCTGATCGCACCCGAACACCCAAGTGAAGCAAAGCACACGGGTCAGTACCATCCTGCAACTCCGTAAATAAGGGTTATCGGGGCCGCGTACGGCCCCCATGATTTATTCTCGATTAGTTGAAAGTTTCTCTTATGTCCGGGCCTCGAGTGTCCTCAGTACAGCAAAGGCACTTATCGGCTTTCTCCTTCAGTTCAAGTCAAGCTTTATCACTCAGGATCCGTGTTTCATATGTCATGTGGACAGTTCCAAACTGTGCGGCCTATGTCGGAGGTCAGGCGGTTTATACCGTGATATATCCTATCTCCGCTTGTATCAGCGTGCTTATTGACCTCAGTTTCGTACGAATCAAAGTTTCACGGTAATTACCTTCCGAATTTGGGCCAGATGATTCTTTGCTCCCACGACTGCATTGTTCCCGTGGACTTCTGAGATGTGACGTTCCAATCTTCAACGATACGGGGGGATATAAATGGCTAGTTCCTCAATAAGCTACCATTCAGGAATGTGTGTGTTAACGGTTCCCATCGGGATCCAGTCGAAGCGACCCCTCGTGGTGGATTGACATCGAACTGTAGCCTACAATGCAACCTGCTCGGCAGGGCCCGGGGTGCCAGCAATACGAGAACGTCTCCGGAAGTGGTCCGGGTACGTTTGCACCGGATAATCTTGATTCGCCGCAGCTGGCATATCGCAAGCTATCTGGCCCCGAGAGAGCCAAGCACCTATGTCGGTCATGAGTAGGCTGGACCTAGGCTAGTGTACTTGGGCAGTTGCACCGGTATTGCGGCTGGCTCGACATAACGCCGACTCCGCGATATCTGGTCAATCTAGTCATAGCGCCATTGTTCCTCGAGCTATGGCACTGCAATAGCAAAGTCACATCATGACGCTTCCGGCTGTTTTAGTTTCAGGTGTTATGTGGGGCTTCAAGTTGCGTGCGAGATAGATGAGGGGGTACCAGAAGCAAGTCAAACTACGGCAGGCGTGATGCTCAGAATTTGAGTGTGCAGTAGAGTAATATGGCCCGCACCGTTTCCTCCATGAGGTTCGTTATGGCTTACACTGTGTAAATTATAAACGAGCACCGGTTTTTGCTTGGCGGTTCTGCAATCTAAACCCTGCCTTGGAAAGTGGTTTAGTAGGGATGTGATTTCACGACTTCATAGTAGGTTACTCTCAGGTCTTACAACACTTGGCGGGCGGCTACAAGAACCTGTACCACCGTGATGGAGAGCCGCACCCATGTATTACACATTAGCAGGCCACTTTATGGCGAGTAGATGGTTCATACGTAGGGCGGACTGTTTCCGCCGTCTGCTCAGGAACCGGACATTGATTTTTACGTCTCCCCGCACAGGGGCGGGCAACTTAAGGGTTGCTAAGAAGAAAACACACGGGCGCAACGGGCAATACGTCCACTAGACGGCGATCGCGACTCGGACATGAGGAACCCGGGTAAGCCCATCTCAGTTAACTATAGGGTTGGAGCCTGAGGGGGTTAAGCGCATTCGGGTTCGTTTTCGTGGCAGTACTCCCGCAATACAACCTTTGGCAGCACAAGTCACGTTAAAAAAGACGGTCGAGGACCCAAGTAGTTAAAGTTTGGCGCCCTCTCGCCCTCTACAATGCTGCCGTTGGTAGCGTATTGCCCCACTTGTATGACACGCCATCTGCGCGTGTGGATGTCGGAGAGCTGAGAGCGCCTTTCTTGGGATGCGTACTAGAGCAGATGGAGCTAAAGCGCATGAAGTATACTAGTGTTCAGCGCACTCATCGCACCCGCCAAGAACTATCTCGTTATGGCACTGTACATAGGTGTACAGCTTTCCGTACAGAAGGGACAGGTGTAGACTTGGGTACGACGTCCTGCTGCTGGAGGACATACCCCCAGCGGGCCATCGGCTCTGTTAAAGCGAGCCAGTCACATCCAGAGGTCGATCCCATGAGCGTCTCCTGCGCTAGGTCCTCACTCTTAGAGACCTTCGGCAAGTAACTGAATCTAGCGGCAAGGCCCGTAGCTTTAGCGGACGAAAATGCAAAACCTGTCAATTATTACAATGACATGCCATAGACAATACTCGTGTCCCGAAGAGTACATCGGCGATGCGCCGAAGTTAAGCAAACCCCGCTGCTTAATCGGTTATCTCATACCAAGCCAAGATAAAGTGACGACCGATTTATAGCAACCAATCAGCAACCACGATACAACGAGAGGCAGTTGGAAAGCAGGCACCGCGTGTAAATTTTCGAATGCCCTGTAACACGAAAGGGTGTTCTCCTACATTCCTCCTGCCCCTCGGCGACAAACTTATCCCGACTATGCCAAGTCACACATCCAAAGGAGTTGTAACACAGTTATCCAGTCTGTTAGAGAGTCACCCCAGACATAGAAAGGCTCCGTCCGAAAATAAATGACGGGGAGTTATATCAATCACATCCTACCCGGGACTGGGTCTCGTCAGAGGTACACTCACGATTCACCCTTAGAATCGAACACTTAGGGGCCCAATACCGTACAGCTCGGTGATGTCTGGGTCAACATTACTCGTCGTCGCTATTTTTGTCCCGTTTCTCCCCTAAGGGTAGATAGCTTTATGGGGGCCTGAGCGATACCATGGACTGTGGACTCAGAGTTTCTATATCGCCATGCAAGACTCACGCTAGGTCATGCAATGCGCCCGCGAATAACATGCGAAGGTCCCTATTATAGTACCAGCACCTATCTGCAATGAGATGAATTCCGGAAGCGACAAACAGGTGCCCGGCTGCCGCGAAGGATTACGATGTCTCTAGTTTCCCGTCGGCCGTATTACACAGCTACCCTAACATTTCCCCTCCAGCCCAAGTTCGGGAGCTGCACCTTTTTTGACTTCATTCTCGCGTGAGCGAACATTCGAAGCAATTGCTATAAATTTTATATGGGTTCTGCGTTGTATCAAAGACTTTTACTTTTGAGTCTCTATATATCGTTAGTGCTCACCCAGAAAATACGGTAGCGTTATGTAGGAGTCAAGGGCAGTGGACTCGACCGATGTCCTAGGGTTTTCGACACAAGTACACGAATGGTGGGAGGACAACCAGTTATAGTGTGCTAAATTTTACTTCGCGAGTAAGAGTCGAATACACAGGCGGGCAAGCTCTAGGAAAGAAGTATTTGATGTAAAAACAGTTGAGTTTGCTGGACAGTACACGGTGGGGATCGACGCTCCCGCATACTCGCTTGGCACCCCTTCAGTATCTTCATCACTAAATCGGTGTCCGGGAATTAACAAAATAGTAG +; +end; + + diff --git a/JC+GAMMA/test3.py b/JC+GAMMA/test3.py new file mode 100644 index 0000000..6cded7e --- /dev/null +++ b/JC+GAMMA/test3.py @@ -0,0 +1,398 @@ +################################## +# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree +# topology, and computes log-likelihood of the topology under Jukes Cantor model +################################### + + +import readSeq +import random +import re, os, itertools, sys, glob +from itertools import chain +from scipy.stats import gamma +from math import exp, log +class node(object): + def __init__(self, ndnum): # initialization function + self.rsib = None # right sibling + self.lchild = None # left child + self.par = None # parent node + self.number = ndnum # node number (internals negative, tips 0 or positive) + self.edgelen = 0.0 # branch length + self.descendants = set([ndnum]) # set containing descendant leaf set + self.partial = None # will have length 4*npatterns + + def allocatePartial(self, patterns, rates): + if self.number > 0: + npatterns = len(patterns) +# print 'npat', npatterns + self.partial = [0.0]*(4*4*npatterns) +# print len(self.partial) + for i,pattern in enumerate(patterns.keys()): + base = pattern[self.number-1] + for l in range(4): + if base == 'A': + self.partial[i*16+l*4 + 0] = 1.0 + elif base == 'C': + self.partial[i*16+l*4 + 1] = 1.0 + elif base == 'G': + self.partial[i*16+l*4 + 2] = 1.0 + elif base == 'T': + self.partial[i*16+l*4 + 3] = 1.0 + else: + assert(False), 'oops, something went horribly wrong!' + + else: +# rt = [0.03338775, 0.25191592, 0.82026848, 2.89442785] +# rt = [2.89442785] + +# rt = [1.0, 1.0, 1.0, 1.0] + + npatterns = len(patterns) +# print 'patterns=', patterns + self.partial = [0.0]*(4*4*npatterns) + like_list = [] + for i,pattern in enumerate(patterns.keys()): +# print i, pattern, patterns.keys() + m_list = [] + num_pattern = patterns[pattern] +# print num_pattern + + for l,m in enumerate(rates): + + + psame = (0.25+0.75*exp(-4.0*m*(self.lchild.edgelen)/3.0)) + pdiff = (0.25-0.25*exp(-4.0*m*(self.lchild.edgelen)/3.0)) + + psame2 = (0.25+0.75*exp(-4.0*m*(self.lchild.rsib.edgelen)/3.0)) + pdiff2 = (0.25-0.25*exp(-4.0*m*(self.lchild.rsib.edgelen)/3.0)) + + num_pattern = patterns[pattern] + pAA = psame*(self.lchild.partial[i*16+l*4 + 0]) + pAC = pdiff*(self.lchild.partial[i*16+l*4 + 1]) + pAG = pdiff*(self.lchild.partial[i*16+l*4 + 2]) + pAT = pdiff*(self.lchild.partial[i*16+l*4 + 3]) + + pAA2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pAC2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pAG2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pAT2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromA_lchild = pAA+pAC+pAG+pAT + pfromA_rchild = pAA2+pAC2+pAG2+pAT2 + self.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild + + + ###################################################### + + pCA = pdiff*(self.lchild.partial[i*16+l*4 + 0]) + pCC = psame*(self.lchild.partial[i*16+l*4 + 1]) + pCG = pdiff*(self.lchild.partial[i*16+l*4 + 2]) + pCT = pdiff*(self.lchild.partial[i*16+l*4 + 3]) + + pCA2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pCC2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pCG2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pCT2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromC_lchild = pCA+pCC+pCG+pCT + pfromC_rchild = pCA2+pCC2+pCG2+pCT2 + self.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild + + ####################################################### + # + pGA = pdiff*(self.lchild.partial[i*16+l*4 + 0]) + pGC = pdiff*(self.lchild.partial[i*16+l*4 + 1]) + pGG = psame*(self.lchild.partial[i*16+l*4 + 2]) + pGT = pdiff*(self.lchild.partial[i*16+l*4 + 3]) + + pGA2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pGC2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pGG2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pGT2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromG_lchild = pGA+pGC+pGG+pGT + pfromG_rchild = pGA2+pGC2+pGG2+pGT2 + self.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild + + ####################################################### + + pTA = pdiff*(self.lchild.partial[i*16+l*4 + 0]) + pTC = pdiff*(self.lchild.partial[i*16+l*4 + 1]) + pTG = pdiff*(self.lchild.partial[i*16+l*4 + 2]) + pTT = psame*(self.lchild.partial[i*16+l*4 + 3]) + + pTA2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pTC2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pTG2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pTT2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromT_lchild = pTA+pTC+pTG+pTT + pfromT_rchild = pTA2+pTC2+pTG2+pTT2 + self.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild + + site_like = (sum(self.partial[i*16:i*16+16]))*0.25*0.25 + site_log_like = (log(site_like))*num_pattern + like_list.append(site_log_like) + log_like = sum(like_list) + return log_like + + + def __str__(self): + # __str__ is a built-in function that is used by print to show an object + descendants_as_string = ','.join(['%d' % d for d in self.descendants]) + + lchildstr = 'None' + if self.lchild is not None: + lchildstr = '%d' % self.lchild.number + + rsibstr = 'None' + if self.rsib is not None: + rsibstr = '%d' % self.rsib.number + + parstr = 'None' + if self.par is not None: + parstr = '%d' % self.par.number + + return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) + +def treenewick(): + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) +# print script_dir + path = os.path.join(script_dir, 'tree.tre') + with open(path, 'r') as content: + newick = content.read() + return newick +a = treenewick() + + +def gammaRates(alpha): + bounds = [0.0, 0.25, 0.50, 0.75, 1.] + rates = [] + for i in range(4): +# print i + lower = gamma.ppf(bounds[i], alpha, 0, 1./alpha) + upper = gamma.ppf(bounds[i+1], alpha, 0, 1./alpha) + mean_rate = ((gamma.cdf(upper, alpha+1., 0, 1./alpha) - gamma.cdf(lower, alpha+1., 0, 1./alpha)))*4. + rates.append(mean_rate) + return rates + +def prepareTree(postorder, patterns, rates): + likelihood_lists = [] + for nd in postorder: + likelihood_lists.append(nd.allocatePartial(patterns, rates)) + print 'log-likelihood of the topology =', likelihood_lists[-1] + + +def joinRandomPair(node_list, next_node_number, is_deep_coalescence): + # pick first of two lineages to join and delete from node_list + i = random.randint(1, len(node_list)) + ndi = node_list[i-1] + del node_list[i-1] + + # pick second of two lineages to join and delete from node_list + j = random.randint(1, len(node_list)) + ndj = node_list[j-1] + del node_list[j-1] + + # join selected nodes and add ancestor to node_list + ancnd = node(next_node_number) + ancnd.deep = is_deep_coalescence + ancnd.lchild = ndi + ancnd.descendants = set() + ancnd.descendants |= ndi.descendants + ancnd.descendants |= ndj.descendants + ndi.rsib = ndj + ndi.par = ancnd + ndj.par = ancnd + node_list.append(ancnd) + + return node_list + + +def makeNewick(nd, brlen_scaler = 1.0, start = True): # + global _newick + global _TL + + if start: + _newick = '' + _TL = 0.0 + + if nd.lchild: + _newick += '(' + makeNewick(nd.lchild, brlen_scaler, False) + + else: + blen = nd.edgelen*brlen_scaler + _TL += blen + _newick += '%d:%.5f' % (nd.number, blen) + + if nd.rsib: + _newick += ',' + makeNewick(nd.rsib, brlen_scaler, False) + elif nd.par is not None: + blen = nd.par.edgelen*brlen_scaler + _TL += blen + _newick += '):%.3f' % blen + + return _newick, _TL + +def calcActualHeight(root): + h = 0.0 + nd = root + while nd.lchild: + nd = nd.lchild + h += nd.edgelen + return h + + +def readnewick(tree): + total_length = len(tree) + internal_node_number = -1 + + root = node(internal_node_number) + nd = root + i = 0 + pre = [root] + while i < total_length: + m = tree[i] + + if m =='(': + internal_node_number -= 1 + + child = node(internal_node_number) + pre.append(child) + nd.lchild=child + + child.par=nd + nd=child + elif m == ',': + internal_node_number -= 1 + rsib = node(internal_node_number) + pre.append(rsib) + nd.rsib = rsib + rsib.par=nd.par + nd = rsib + elif m == ')': + nd = nd.par + + elif m == ':': + edge_len_str = '' + i+=1 + m = tree[i] + assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] + while m in ['0','1','2','3','4','5','6','7','8', '9','.']: + edge_len_str += m + i+=1 + m = tree[i] + i -=1 + nd.edgelen = float(edge_len_str) + + + else: + internal_node_number += 1 + + if True: + assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m + mm = '' + while m in ['0','1','2','3','4','5','6','7','8', '9' ]: + + mm += m + + i += 1 + m = tree[i] + nd.number = int(mm) + i -= 1 + + i += 1 + + post = pre[:] + post.reverse() + return post + +def Makenewick(pre): + newickstring = '' + for i,nd in enumerate(pre): + if nd.lchild: + newickstring += '(' + + elif nd.rsib: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + newickstring += ',' + + else: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + tmpnd = nd + while (tmpnd.par is not None) and (tmpnd.rsib is None): + newickstring += ')' + newickstring += ':%.1f' % tmpnd.par.edgelen + tmpnd = tmpnd.par + + if tmpnd.par is not None: + newickstring += ',' + return newickstring + +###################yule tree################################################### +# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree +# - num_species is the number of leaves (tips) in the tree +def calcPhi(num_species): + phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) + return phi + +# yuleTree creates a species tree in which edge lengths are measured in +# expected number of substitutions. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def yuleTree(num_species, mu_over_s): + # create num_species nodes numbered 1, 2, ..., num_species + nodes = [node(i+1) for i in range(num_species)] + + next_node_number = num_species + 1 + while len(nodes) > 1: + # choose a speciation time in generations + K = float(len(nodes)) + mean_epoch_length = mu_over_s/K + t = random.gammavariate(1.0, mean_epoch_length) + + # update each node's edgelen + for n in nodes: + n.edgelen += t # same as: n.edgelen = n.edgelen + t + + nodes = joinRandomPair(nodes, next_node_number, False) + next_node_number += 1 + + return nodes[0] + +# calcExpectedHeight returns the expected height of the species tree in terms of +# expected number of substitutions from the root to one tip. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def calcExpectedHeight(num_species, mu_over_s): + return mu_over_s*calcPhi(num_species) + + +if __name__ == '__main__': + random_seed = 24553 # 7632557, 12345 + number_of_species = 5 + mutation_speciation_rate_ratio = 0.4 # 0.689655172 # yields tree height 1 for 6 species + random.seed(random_seed) + species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) +# print '#########' +# print species_tree_root + newick = makeNewick(species_tree_root) +# print 'Random number seed: %d' % random_seed +# print 'Simulating one tree:' +# print ' number of species = %d' % number_of_species +# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio +# print ' actual tree length =',newick[1] + expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) +# print ' expected height =',expected_height + actual_height = calcActualHeight(species_tree_root) +# print ' actual height =',actual_height +# print ' newick: ',newick[0] + + alpha = 0.5 ### gamma shape parameter rate categories + yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' + rates_list = gammaRates(alpha) + postorder = readnewick(yuletree) + result = prepareTree(postorder, readSeq.patterns(), rates_list) \ No newline at end of file diff --git a/JC+GAMMA/tree.tre b/JC+GAMMA/tree.tre new file mode 100644 index 0000000..2290a90 --- /dev/null +++ b/JC+GAMMA/tree.tre @@ -0,0 +1 @@ +(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112) \ No newline at end of file diff --git a/JC+GAMMA/treeLike.py b/JC+GAMMA/treeLike.py new file mode 100644 index 0000000..c3007fa --- /dev/null +++ b/JC+GAMMA/treeLike.py @@ -0,0 +1,367 @@ +################################## +# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree +# topology, and computes log-likelihood of the topology under Jukes Cantor model +################################### + + +import readSeq +import random +import re, os, itertools, sys, glob +from itertools import chain +from math import exp, log +class node(object): + def __init__(self, ndnum): # initialization function + self.rsib = None # right sibling + self.lchild = None # left child + self.par = None # parent node + self.number = ndnum # node number (internals negative, tips 0 or positive) + self.edgelen = 0.0 # branch length + self.descendants = set([ndnum]) # set containing descendant leaf set + self.partial = None # will have length 4*npatterns + + + def __str__(self): + # __str__ is a built-in function that is used by print to show an object + descendants_as_string = ','.join(['%d' % d for d in self.descendants]) + + lchildstr = 'None' + if self.lchild is not None: + lchildstr = '%d' % self.lchild.number + + rsibstr = 'None' + if self.rsib is not None: + rsibstr = '%d' % self.rsib.number + + parstr = 'None' + if self.par is not None: + parstr = '%d' % self.par.number + + return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) + +def allocatePartial(node, patterns): + if node.number > 0: + npatterns = len(patterns) + node.partial = [0.0]*(4*npatterns) + for i,pattern in enumerate(patterns.keys()): + base = pattern[node.number-1] + if base == 'A': + node.partial[i*4 + 0] = 1.0 + elif base == 'C': + node.partial[i*4 + 1] = 1.0 + elif base == 'G': + node.partial[i*4 + 2] = 1.0 + elif base == 'T': + node.partial[i*4 + 3] = 1.0 + else: + assert(False), 'oops, something went horribly wrong!' + + else: + npatterns = len(patterns) + node.partial = [0.0]*(4*npatterns) + like_list = [] + for i,pattern in enumerate(patterns.keys()): + psame = (0.25+0.75*exp(-4.0*(node.lchild.edgelen)/3.0)) + pdiff = (0.25-0.25*exp(-4.0*(node.lchild.edgelen)/3.0)) + + psame2 = (0.25+0.75*exp(-4.0*(node.lchild.rsib.edgelen)/3.0)) + pdiff2 = (0.25-0.25*exp(-4.0*(node.lchild.rsib.edgelen)/3.0)) + + num_pattern = patterns[pattern] + + pAA = psame*(node.lchild.partial[i*4 + 0]) + pAC = pdiff*(node.lchild.partial[i*4 + 1]) + pAG = pdiff*(node.lchild.partial[i*4 + 2]) + pAT = pdiff*(node.lchild.partial[i*4 + 3]) + + pAA2 = psame2*(node.lchild.rsib.partial[i*4 + 0]) + pAC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1]) + pAG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2]) + pAT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3]) + + pfromA_lchild = pAA+pAC+pAG+pAT + pfromA_rchild = pAA2+pAC2+pAG2+pAT2 + node.partial[i*4 + 0] = pfromA_lchild*pfromA_rchild + + ###################################################### + + pCA = pdiff*(node.lchild.partial[i*4 + 0]) + pCC = psame*(node.lchild.partial[i*4 + 1]) + pCG = pdiff*(node.lchild.partial[i*4 + 2]) + pCT = pdiff*(node.lchild.partial[i*4 + 3]) + + pCA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0]) + pCC2 = psame2*(node.lchild.rsib.partial[i*4 + 1]) + pCG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2]) + pCT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3]) + + pfromC_lchild = pCA+pCC+pCG+pCT + pfromC_rchild = pCA2+pCC2+pCG2+pCT2 + node.partial[i*4 + 1] = pfromC_lchild*pfromC_rchild + + ####################################################### +# + pGA = pdiff*(node.lchild.partial[i*4 + 0]) + pGC = pdiff*(node.lchild.partial[i*4 + 1]) + pGG = psame*(node.lchild.partial[i*4 + 2]) + pGT = pdiff*(node.lchild.partial[i*4 + 3]) +# + pGA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0]) + pGC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1]) + pGG2 = psame2*(node.lchild.rsib.partial[i*4 + 2]) + pGT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3]) +# + pfromG_lchild = pGA+pGC+pGG+pGT + pfromG_rchild = pGA2+pGC2+pGG2+pGT2 + node.partial[i*4 + 2] = pfromG_lchild*pfromG_rchild + + ####################################################### + + pTA = pdiff*(node.lchild.partial[i*4 + 0]) + pTC = pdiff*(node.lchild.partial[i*4 + 1]) + pTG = pdiff*(node.lchild.partial[i*4 + 2]) + pTT = psame*(node.lchild.partial[i*4 + 3]) +# + pTA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0]) + pTC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1]) + pTG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2]) + pTT2 = psame2*(node.lchild.rsib.partial[i*4 + 3]) +# + pfromT_lchild = pTA+pTC+pTG+pTT + pfromT_rchild = pTA2+pTC2+pTG2+pTT2 + node.partial[i*4 + 3] = pfromT_lchild*pfromT_rchild + + ######################################################### + + site_log_like = (log((sum(node.partial[i*4:i*4+4]))*0.25))*num_pattern + like_list.append(site_log_like) + + log_Like = sum(like_list) + return log_Like + +def treenewick(): + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + path = os.path.join(script_dir, 'tree.tre') + with open(path, 'r') as content: + newick = content.read() + return newick + +def prepareTree(postorder, patterns): + likelihood_lists = [] + for nd in postorder: + likelihood_lists.append(allocatePartial(nd, patterns)) + print 'log-likelihood of the topology =', likelihood_lists[-1] + +def joinRandomPair(node_list, next_node_number, is_deep_coalescence): + # pick first of two lineages to join and delete from node_list + i = random.randint(1, len(node_list)) + ndi = node_list[i-1] + del node_list[i-1] + + # pick second of two lineages to join and delete from node_list + j = random.randint(1, len(node_list)) + ndj = node_list[j-1] + del node_list[j-1] + + # join selected nodes and add ancestor to node_list + ancnd = node(next_node_number) + ancnd.deep = is_deep_coalescence + ancnd.lchild = ndi + ancnd.descendants = set() + ancnd.descendants |= ndi.descendants + ancnd.descendants |= ndj.descendants + ndi.rsib = ndj + ndi.par = ancnd + ndj.par = ancnd + node_list.append(ancnd) + + return node_list + + +def makeNewick(nd, brlen_scaler = 1.0, start = True): # + global _newick + global _TL + + if start: + _newick = '' + _TL = 0.0 + + if nd.lchild: + _newick += '(' + makeNewick(nd.lchild, brlen_scaler, False) + + else: + blen = nd.edgelen*brlen_scaler + _TL += blen + _newick += '%d:%.5f' % (nd.number, blen) + + if nd.rsib: + _newick += ',' + makeNewick(nd.rsib, brlen_scaler, False) + elif nd.par is not None: + blen = nd.par.edgelen*brlen_scaler + _TL += blen + _newick += '):%.3f' % blen + + return _newick, _TL + +def calcActualHeight(root): + h = 0.0 + nd = root + while nd.lchild: + nd = nd.lchild + h += nd.edgelen + return h + + +def readnewick(tree): + total_length = len(tree) + internal_node_number = -1 + + root = node(internal_node_number) + nd = root + i = 0 + pre = [root] + while i < total_length: + m = tree[i] + + if m =='(': + internal_node_number -= 1 + + child = node(internal_node_number) + pre.append(child) + nd.lchild=child + + child.par=nd + nd=child + elif m == ',': + internal_node_number -= 1 + rsib = node(internal_node_number) + pre.append(rsib) + nd.rsib = rsib + rsib.par=nd.par + nd = rsib + elif m == ')': + nd = nd.par + + elif m == ':': + edge_len_str = '' + i+=1 + m = tree[i] + assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] + while m in ['0','1','2','3','4','5','6','7','8', '9','.']: + edge_len_str += m + i+=1 + m = tree[i] + i -=1 + nd.edgelen = float(edge_len_str) + + + else: + internal_node_number += 1 + + if True: + assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m + mm = '' + while m in ['0','1','2','3','4','5','6','7','8', '9' ]: + + mm += m + + i += 1 + m = tree[i] + nd.number = int(mm) + i -= 1 + + i += 1 + + post = pre[:] + post.reverse() + return post + +def Makenewick(pre): + newickstring = '' + for i,nd in enumerate(pre): + if nd.lchild: + newickstring += '(' + + elif nd.rsib: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + newickstring += ',' + + else: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + tmpnd = nd + while (tmpnd.par is not None) and (tmpnd.rsib is None): + newickstring += ')' + newickstring += ':%.1f' % tmpnd.par.edgelen + tmpnd = tmpnd.par + + if tmpnd.par is not None: + newickstring += ',' + return newickstring + +###################yule tree################################################### +# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree +# - num_species is the number of leaves (tips) in the tree +def calcPhi(num_species): + phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) + return phi + +# yuleTree creates a species tree in which edge lengths are measured in +# expected number of substitutions. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def yuleTree(num_species, mu_over_s): + # create num_species nodes numbered 1, 2, ..., num_species + nodes = [node(i+1) for i in range(num_species)] + + next_node_number = num_species + 1 + while len(nodes) > 1: + # choose a speciation time in generations + K = float(len(nodes)) + mean_epoch_length = mu_over_s/K + t = random.gammavariate(1.0, mean_epoch_length) + + # update each node's edgelen + for n in nodes: + n.edgelen += t # same as: n.edgelen = n.edgelen + t + + nodes = joinRandomPair(nodes, next_node_number, False) + next_node_number += 1 + + return nodes[0] + +# calcExpectedHeight returns the expected height of the species tree in terms of +# expected number of substitutions from the root to one tip. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def calcExpectedHeight(num_species, mu_over_s): + return mu_over_s*calcPhi(num_species) + + +if __name__ == '__main__': + random_seed = 24553 # 7632557, 12345 + number_of_species = 5 + mutation_speciation_rate_ratio = 0.4 # 0.689655172 # yields tree height 1 for 6 species + random.seed(random_seed) + species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) +# print '#########' +# print species_tree_root + newick = makeNewick(species_tree_root) +# print 'Random number seed: %d' % random_seed +# print 'Simulating one tree:' +# print ' number of species = %d' % number_of_species +# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio +# print ' actual tree length =',newick[1] + expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) +# print ' expected height =',expected_height + actual_height = calcActualHeight(species_tree_root) +# print ' actual height =',actual_height +# print ' newick: ',newick[0] + + +# yuletree = '(((1:0.03915,5:0.03915):0.387,(4:0.42253,2:0.42253):0.004):0.118,3:0.54433)' + + postorder = readnewick(treenewick()) + result = prepareTree(postorder, readSeq.patterns()) \ No newline at end of file diff --git a/brnlenMCMC.py b/brnlenMCMC.py deleted file mode 100644 index 87c6160..0000000 --- a/brnlenMCMC.py +++ /dev/null @@ -1,494 +0,0 @@ -########################################################################################## -# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree -# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model, -# and performs MCMC on branch length parameter -########################################################################################## - - -import readSeq -import random -import re, os, itertools, sys, glob -from itertools import chain -from scipy.stats import gamma -from math import exp, log - - -########################################################################################## -tree_file_name = 'tree.tre' -sequence_file = 'example3.nex' -alpha = 0.5 #gamma shape parameter for rate categories -n_gen = 50000 -save_every = 50 -mean_expo = 10. #mean_expo = mean of exponential distribution for branch length prior -########################################################################################## - - - - -class node(object): - def __init__(self, ndnum): # initialization function - self.rsib = None # right sibling - self.lchild = None # left child - self.par = None # parent node - self.number = ndnum # node number (internals negative, tips 0 or positive) - self.edgelen = 0.0 # branch length - self.descendants = set([ndnum]) # set containing descendant leaf set - self.partial = None # will have length 4*npatterns - - - def __str__(self): - # __str__ is a built-in function that is used by print to show an object - descendants_as_string = ','.join(['%d' % d for d in self.descendants]) - - lchildstr = 'None' - if self.lchild is not None: - lchildstr = '%d' % self.lchild.number - - rsibstr = 'None' - if self.rsib is not None: - rsibstr = '%d' % self.rsib.number - - parstr = 'None' - if self.par is not None: - parstr = '%d' % self.par.number - - return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - - - -def allocatePartial(node, patterns, rates): - if node.number > 0: - npatterns = len(patterns) - - if node.partial is None: - node.partial = [0.0]*(4*4*npatterns) -# print len(node.partial) - for i,pattern in enumerate(patterns.keys()): - base = pattern[node.number-1] - for l in range(4): - if base == 'A': - - - node.partial[i*16+l*4 + 0] = 1.0 - elif base == 'C': - node.partial[i*16+l*4 + 1] = 1.0 - elif base == 'G': - node.partial[i*16+l*4 + 2] = 1.0 - elif base == 'T': - node.partial[i*16+l*4 + 3] = 1.0 - else: - assert(False), 'oops, something went horribly wrong!' - - else: - - npatterns = len(patterns) - if node.partial is None: - node.partial = [0.0]*(4*4*npatterns) - - like_list = [] - for i,pattern in enumerate(patterns.keys()): - m_list = [] - num_pattern = patterns[pattern] - - for l,m in enumerate(rates): - - psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - pdiff = (0.25-0.25*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - - psame2 = (0.25+0.75*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - pdiff2 = (0.25-0.25*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - - num_pattern = patterns[pattern] - pAA = psame*(node.lchild.partial[i*16+l*4 + 0]) - pAC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pAG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pAT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pAA2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pAC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pAG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pAT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromA_lchild = pAA+pAC+pAG+pAT - pfromA_rchild = pAA2+pAC2+pAG2+pAT2 - node.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild - - - ###################################################### - - pCA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pCC = psame*(node.lchild.partial[i*16+l*4 + 1]) - pCG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pCT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pCA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pCC2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pCG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pCT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromC_lchild = pCA+pCC+pCG+pCT - pfromC_rchild = pCA2+pCC2+pCG2+pCT2 - node.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild - - ####################################################### -# - pGA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pGC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pGG = psame*(node.lchild.partial[i*16+l*4 + 2]) - pGT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pGA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pGC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pGG2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pGT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromG_lchild = pGA+pGC+pGG+pGT - pfromG_rchild = pGA2+pGC2+pGG2+pGT2 - node.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild - - ####################################################### - - pTA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pTC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pTG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pTT = psame*(node.lchild.partial[i*16+l*4 + 3]) - - pTA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pTC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pTG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pTT2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromT_lchild = pTA+pTC+pTG+pTT - pfromT_rchild = pTA2+pTC2+pTG2+pTT2 - node.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild - - site_like = (sum(node.partial[i*16:i*16+16]))*0.25*0.25 - site_log_like = (log(site_like))*num_pattern - like_list.append(site_log_like) - log_like = sum(like_list) - return log_like - - - -def mcmcbrn(postorder, patterns, rates): - nodes = readnewick(treenewick()) - mcmc = 0 - output = os.path.join('brnlenMCMC_results.txt') - newf = open(output, 'w') - newf.write('%s\t'%('n_gen')) - newf.write( '%s\t%s\t'%('LnL','LnPr')) - for nl in postorder: - newf.write( 'node%s\t'%(nl.number)) - newf.write('\n') - start_log_prior = 0.0 - for nd in nodes: - start_log_prior += (-nd.edgelen/mean_expo)-(log(mean_expo)) - start_log_like = prepareTree(nodes, patterns, rates) - - - newf.write('%s\t'%(mcmc)) - print 'mcmc gen=', mcmc - print start_log_like, start_log_prior, - - newf.write( '%.6f\t%.6f\t'%(start_log_like,start_log_prior)) - for nl in postorder: - newf.write( '%.6f\t'%(nl.edgelen)) - print nl.edgelen, - print - print '**************************' - - newf.write('\n') -# print - for r in range(n_gen): - for i in range(len(postorder)): - preedgelen = nodes[i].edgelen - currentlike = prepareTree(nodes, patterns, rates) -# currentlike = 0.0 - currentprior = 0.0 - for nd in nodes: - currentprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) - current_ln_posterior = currentlike + currentprior - - u = random.random() - m = exp(0.2*(u-0.5)) - nodes[i].edgelen = preedgelen*m - proposedprior = 0.0 - for nd in nodes: - proposedprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) - - proposedlike = prepareTree(nodes, patterns, rates) - proposed_ln_posterior = proposedlike + proposedprior - hastings_ratio = log(m) - logR = proposed_ln_posterior - current_ln_posterior + hastings_ratio - u2 = random.random() - if log(u2) < logR: - nodes[i].edgelen = nodes[i].edgelen - log_prior = proposedprior - log_likelihood = proposedlike -# print 'log(u2) < logR so new proposal accepted..' - else: - nodes[i].edgelen = preedgelen - log_prior = currentprior - log_likelihood = currentlike - -# print 'log(u2) > logR so failed to accept new proposal..' - - if (r+1) % save_every == 0: - newf.write('%s\t'%(mcmc+1)) - print 'mcmc gen=', mcmc+1 - print log_likelihood,log_prior, - newf.write( '%.6f\t%.6f\t'%(log_likelihood,log_prior)) - for j,k in enumerate(nodes): - newf.write( '%.6f\t'%(k.edgelen)) - print k.edgelen, - newf.write('\n') - print - print '**************************' - - newf.flush() - - mcmc+=1 - - -def treenewick(): - script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - path = os.path.join(script_dir, tree_file_name) - with open(path, 'r') as content: - newick = content.read() - return newick -# - -def gammaRates(alpha): - bounds = [0.0, 0.25, 0.50, 0.75, 1.] - rates = [] - for i in range(4): -# print i - lower = gamma.ppf(bounds[i], alpha, 0, 1./alpha) - upper = gamma.ppf(bounds[i+1], alpha, 0, 1./alpha) - mean_rate = ((gamma.cdf(upper, alpha+1., 0, 1./alpha) - gamma.cdf(lower, alpha+1., 0, 1./alpha)))*4. - rates.append(mean_rate) - return rates - -def prepareTree(postorder, patterns, rates): - likelihood_lists = [] - for nd in postorder: - likelihood_lists.append(allocatePartial(nd, patterns, rates)) -# print 'log-likelihood of the topology =', likelihood_lists[-1] - return likelihood_lists[-1] - -def joinRandomPair(node_list, next_node_number, is_deep_coalescence): - # pick first of two lineages to join and delete from node_list - i = random.randint(1, len(node_list)) - ndi = node_list[i-1] - del node_list[i-1] - - # pick second of two lineages to join and delete from node_list - j = random.randint(1, len(node_list)) - ndj = node_list[j-1] - del node_list[j-1] - - # join selected nodes and add ancestor to node_list - ancnd = node(next_node_number) - ancnd.deep = is_deep_coalescence - ancnd.lchild = ndi - ancnd.descendants = set() - ancnd.descendants |= ndi.descendants - ancnd.descendants |= ndj.descendants - ndi.rsib = ndj - ndi.par = ancnd - ndj.par = ancnd - node_list.append(ancnd) - - return node_list - - -def makeNewick(nd, brlen_scaler = 1.0, start = True): # - global _newick - global _TL - - if start: - _newick = '' - _TL = 0.0 - - if nd.lchild: - _newick += '(' - makeNewick(nd.lchild, brlen_scaler, False) - - else: - blen = nd.edgelen*brlen_scaler - _TL += blen - _newick += '%d:%.5f' % (nd.number, blen) - - if nd.rsib: - _newick += ',' - makeNewick(nd.rsib, brlen_scaler, False) - elif nd.par is not None: - blen = nd.par.edgelen*brlen_scaler - _TL += blen - _newick += '):%.3f' % blen - - return _newick, _TL - -def calcActualHeight(root): - h = 0.0 - nd = root - while nd.lchild: - nd = nd.lchild - h += nd.edgelen - return h - - -def readnewick(tree): - total_length = len(tree) - internal_node_number = -1 - - root = node(internal_node_number) - nd = root - i = 0 - pre = [root] - while i < total_length: - m = tree[i] - - if m =='(': - internal_node_number -= 1 - - child = node(internal_node_number) - pre.append(child) - nd.lchild=child - - child.par=nd - nd=child - elif m == ',': - internal_node_number -= 1 - rsib = node(internal_node_number) - pre.append(rsib) - nd.rsib = rsib - rsib.par=nd.par - nd = rsib - elif m == ')': - nd = nd.par - - elif m == ':': - edge_len_str = '' - i+=1 - m = tree[i] - assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] - while m in ['0','1','2','3','4','5','6','7','8', '9','.']: - edge_len_str += m - i+=1 - m = tree[i] - i -=1 - nd.edgelen = float(edge_len_str) - - - else: - internal_node_number += 1 - - if True: - assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m - mm = '' - while m in ['0','1','2','3','4','5','6','7','8', '9' ]: - - mm += m - - i += 1 - m = tree[i] - nd.number = int(mm) - i -= 1 - - i += 1 - - post = pre[:] - post.reverse() - return post - -def Makenewick(pre): - newickstring = '' - for i,nd in enumerate(pre): - if nd.lchild: - newickstring += '(' - - elif nd.rsib: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - newickstring += ',' - - else: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - tmpnd = nd - while (tmpnd.par is not None) and (tmpnd.rsib is None): - newickstring += ')' - newickstring += ':%.1f' % tmpnd.par.edgelen - tmpnd = tmpnd.par - - if tmpnd.par is not None: - newickstring += ',' - return newickstring - -###################yule tree################################################### -# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree -# - num_species is the number of leaves (tips) in the tree -def calcPhi(num_species): - phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) - return phi - -# yuleTree creates a species tree in which edge lengths are measured in -# expected number of substitutions. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def yuleTree(num_species, mu_over_s): - # create num_species nodes numbered 1, 2, ..., num_species - nodes = [node(i+1) for i in range(num_species)] - - next_node_number = num_species + 1 - while len(nodes) > 1: - # choose a speciation time in generations - K = float(len(nodes)) - mean_epoch_length = mu_over_s/K - t = random.gammavariate(1.0, mean_epoch_length) - - # update each node's edgelen - for n in nodes: - n.edgelen += t # same as: n.edgelen = n.edgelen + t - - nodes = joinRandomPair(nodes, next_node_number, False) - next_node_number += 1 - - return nodes[0] - -# calcExpectedHeight returns the expected height of the species tree in terms of -# expected number of substitutions from the root to one tip. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def calcExpectedHeight(num_species, mu_over_s): - return mu_over_s*calcPhi(num_species) - - -if __name__ == '__main__': - random_seed = 348889 # 7632557, 12345 - number_of_species = 5 - mutation_speciation_rate_ratio = 0.689655172 # 0.689655172 # yields tree height 1 for 6 species - random.seed(random_seed) - species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) -# print '#########' -# print species_tree_root - newick = makeNewick(species_tree_root) -# print 'Random number seed: %d' % random_seed -# print 'Simulating one tree:' -# print ' number of species = %d' % number_of_species -# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio -# print ' actual tree length =',newick[1] - expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) -# print ' expected height =',expected_height - actual_height = calcActualHeight(species_tree_root) -# print ' actual height =',actual_height - print 'true tree: ',newick[0] - print '**************************' - -# yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' - rates_list = gammaRates(alpha) - postorder = readnewick(treenewick()) - result = prepareTree(postorder, readSeq.patterns(sequence_file), rates_list) -# try1 = readSeq.patterns() - result2 = mcmcbrn(postorder, readSeq.patterns(sequence_file), rates_list) diff --git a/tree.tre b/tree.tre deleted file mode 100644 index 2d84738..0000000 --- a/tree.tre +++ /dev/null @@ -1 +0,0 @@ -(5:1.8601,((3:0.47109,2:0.47109):0.492,(4:0.05805,1:0.05805):0.906):0.896) \ No newline at end of file