diff --git a/JC+GAMMA/readSeq.py b/JC+GAMMA/readSeq.py new file mode 100644 index 0000000..6453201 --- /dev/null +++ b/JC+GAMMA/readSeq.py @@ -0,0 +1,69 @@ +def patterns(): + # + import re, os, glob, itertools, fnmatch, sys, shutil + from itertools import combinations + from collections import Counter + + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) +# path = os.path.join(script_dir, 'nexus') + + genes = [] + data = {} +# print 'Reading nexus files...' + for filename in glob.glob(os.path.join(script_dir, '*.nex')): + + m = re.match('(.+).nex', os.path.basename(filename)) + gene_name = m.group(1) +# print 'gene_name=', gene_name + genes.append(gene_name) + f = open(filename, 'r').read() + + m = re.search('ntax\s*=\s*(\d+)', f, re.M | re.S) + ntax = int(m.group(1)) +# print 'ntax=', ntax + + m = re.search('nchar\s*=\s*(\d+)', f, re.M | re.S) + nchar = int(m.group(1)) +# print 'nchar=', nchar + + m = re.search('Matrix\s+(.+?);', f, re.M | re.S) + matrix = m.group(1).strip() + matrix_lines = matrix.split('\n') + + taxon_names = [] + sequences = {} + sequences_list = [] + for line in matrix_lines: + parts = line.strip().split() + assert len(parts) == 2 + taxon_name = parts[0] + sequence = parts[1] + + taxon_names.append(taxon_name) + sequences_list.append(sequence) + sequences[taxon_name] = sequence + + pattern_list = [] + + k=0 + while k < nchar: + site_pattern = '' + for i,m in enumerate(sequences_list): + site_pattern += m[k] + pattern_list.append(site_pattern) + k+=1 + pattern_dict = dict() + for i in pattern_list: + pattern_dict[i] = pattern_dict.get(i, 0) + 1 + + tmp = [] + for key in pattern_dict.keys(): ###convert dict to key of tupules +# print 'key=', key + tmp.append((pattern_dict[key],key)) + + sorted_values = sorted(tmp) ###sorted according to key smaller to larger + sorted_values.sort(cmp = lambda x,y:cmp(x[1],y[1])) ###sorted according to values in alphabetical order + + return pattern_dict + + diff --git a/JC+GAMMA/readSeq.pyc b/JC+GAMMA/readSeq.pyc new file mode 100644 index 0000000..157ab98 Binary files /dev/null and b/JC+GAMMA/readSeq.pyc differ diff --git a/readtree.py b/JC+GAMMA/readtree.py similarity index 100% rename from readtree.py rename to JC+GAMMA/readtree.py diff --git a/JC+GAMMA/simulated.nex b/JC+GAMMA/simulated.nex new file mode 100644 index 0000000..af9f492 --- /dev/null +++ b/JC+GAMMA/simulated.nex @@ -0,0 +1,20 @@ +#nexus + +begin data; +dimensions ntax=10 nchar=10000; +format datatype=dna missing=? gap=-; +Matrix +1 CTTTCTGGGTGACGTCGCGCGAAGCTTTTGCCCTTAGCCCTGAAACATAGCGCGCAGCACATGCATGTCACCGGTTTGTCAACCCGCCAGTGCTACTCAATGGGTCATATCGGGCATGATCCGCCTCCCTTCCCGGCCATATTAACGGTCTTGAACCTCATTTTCGCGGTACGCGCGTCCACGTTGTGATTGACAAGCCTCCATGCGGGAGCACCTAACATACGTTAGCCTGATCCATGGGACGTAAACTGCACTACATCAGTTTTGTGGGTCTCAGTAGGCGGCTCTGACAGTAGATTCCCTGCAGTACAGATCTCTACGCAACACATAGAACTAGGTTGCTGGAAGTTGAAAACGTTTCGACTGCCTGTACAAAAAATTCGTCGGCAATACAAGTACCTGTAGTTGCCTTGATTGCTCTCAGGTCACATAGGCTTGATTTTAACCAGACTGACGAGTGGGGGCCTCGACGCTGCTATAAAGTGATACATAAGCTCGGAGCAACAAATCTTCAATCGTTTACGAGGGGGGGCCGGCGCAAGCGCGATCACTCTTCCTGTGCCGTATATAGCTCGGATAGGCCCAGGCGGCTAACTAATCCGGTTACAATCAGTATATAGGCAGGGCACGGCACGTGCGGCCCGTGAAACGAAAAATACCTCAATGCCATGGCACTCCTATCTTTGAGGCAACTCGCTGTATGCTATTTTGACAACTCCTCCACCCTAAAAACCTCTTCAACGTGAAACCGTTGCAATCGTAGAAGTTCTCTGACTGTGATGAGAATGCCCTGGTGCAACCGTTTGACTAAATCATACGGTGCTGAAACTGCACATGAACCTCTGGTATCGGAAAGGCGAGGAACACTCGGTATTCGCGCAGTGGAATCGTCCCATATCTATATAGTTAGGCTGAATAGAAAACGCGGATACTCTTTGTCCCACTGAGATTGGCCGGATGCAAGCGTACTCTTAAAGATTTACGCATCTAACTGTCTCTACCGGGGGATATCCCCACACTAACTTTGACAAGGACGTCTAGACCTAAGTAGGGATCTCCGAACTGTCCTTCCTCTGTCCGTCAATCCCGGCATCCACTCTAACGCCGACAAGCTTAAAATTGATATAATAGCGAACCCGGAATTAGTGTCGTCGCCTCGGGAGGCAGTTATAGATTAGTGATTAGCCACTATCGCGCCCGTTTGGACAAAAAAGACGCATAACGACCGCTGAGCGAGATAACATATGCTGCGCCCGACGCGCGAACGTGCCATAGCCTGCAACACTATCGTGGTAGTCCGTATCCCTTCCGTGATCCCCTGGGCCACTTACCCGACTACCTCCGATGTGTCCCCCCTCGTGCAAGACACCGTCCATAGGCACGTTCTGGATTGCTCCGTACGACCCGTAAAAATCAGTGGGACGTCCCTAGAGGCAGACATTCCGGGACACGGTTTCTAGAGCGAAAGGGGGTTGTTCTGGAAGAGAAAATAGGGACCTCATATAAGTAGGAATAAACGGTCGACGTCCGAATGTGGTCACCTTGCTTTAGACGCTGTTGTCACTTAGCAAGTATGAGATGGACGCGAAGTGGTCAACTACCCGGTTTCCCCTGACGGGGTACGTTGCGAAGGGAACTCTGCTGTAAGCAACAGCTGTGGCGTTAAAGGCAAAGGCGAAGAGTGGTAGAACGTACCTCGTTCATCGACTTACCGAGGTTTTCTAATTCTTTGCAATCATGCTTATTGAGGTTAAAGAGTACTCACCCGTTGTACTCCGGTTTTTCCTGTAATGTTTCCAAATGGTTAATGAAACTAGCCGCTGCGCCGGCCTAGTGATCCCGAGCCTGGCAACAGGGAACAACCTTAACGGGTGACCAGTGGGGTGTCTCTGTGGATATAGTCACCCGCCTAACACGCCAGTGATGGAGTTTCTAGCTAGAGACATCTTCGGCAGCTGGTATAAGTATGTGATCTTGAGTGCGGTAATTACGAAGAGTTGCGGAGGAATCCGTGCGTTGCCTTTAGGGTTCATCGAGAGCCCTACCTCCAATTGTATCTTACGCGTCCCGCGGGTTTGTCAACCTACGCAGCCGTTCATTCTCGGTCGACGACGGATATAGGCTGGAAAATTATCGAGAGGCTGGCCCAACTGAGCTCTCGTCACATAGGACTCGGAAGATGCGTGGATGAGCTTAGACTTAGCGAAGTAGCTGCCTTGTTGCCGTCAAGGATATTGCATCACGCAAATACTACGAGACACCCGTACTCCAACCGATCGATACTTGAAGATTTGTCGAAGGTCAAGAAGATCATACACCTCCTGGGGCACTAAGCCGGGGATTAAAGAGCTGGCAAATAACTCACCATTAAGCGATGAGCGGCCCAGGTCCTCCCTAACGTTGATGGTCTGGGGGTTGGGTGAACAATTTGGTCCACGGGGACTTTTGTCGGTTTTCTCAATAGGAAAACTCACCCACGTTAACGCTAGCCACTACCAAATATGCGGGATTTACCTGCGCTGTGGGTAGCCAATAGATGCACGAGCACCGTATCGAACCATCAAGCTCAAATCGTTCGAACAAGGGAAGTGTAGCGTTGAGCTAGTCCATCTAATCATCAATATACAATAGCCGGCAATCAACTGGCCTTAGCGGTCAGACAGTCGAGGCTGGTATTCGCATGTCAATCGAGGTCAGACCACGAGGTTGCACTGTTCTGCTCAGCGGTATGGATCAACATGCGATGTGCGCCATTCGTAGTGCCGCTTGGCAAAATCAATTTCTCTCCCTGTGCTACGTATGTTCGCAACACCTTGGCCGTAGTCCGGGTTTTTTGACGGCTGGGAGTTCTCGCAATGGACATCCCCGGGTTCCCGCCACGCCTGGGGTCTACAGGGGGATCGCGGTCATGCTTCAGAATTCCGCCTACGTGTTCTAGCAATTATGAAATCAACCAATGGCCTTTTACATGTCGGTGCTCGAATGTGTGGAACGAAGTTGGCTTGGTGTATGGATCACCCCACTAATCTTTTAAGAGATCCGGCTTAAACCTCACTCCTCTTTGTTGGCGCAAGCATAGTTAAGAATTTGCTTTGTGCCTGTGCCATTACGATTGACATGGTGGGCCCCGTATAGGGTCGCCAAAAAATTGTGAAGGCGAAGAGTCAAATGCCTTGTTTGGAACTCCGGCCGGGTGAAGCGCACTCTTGGCGATTGGGATCCTGACCCGGATCCCCAGTTATGTTACTTGGTCGATTCGTTTCTGGTCTTTTCACGGCGCCCCTATCTGAGAAACTTAGTCCATCGTGCAATAGCACCTTACACGTTGTGTCGTTGCCGGTGCCGTTGTTTTATTTTGGGGGCCGAGGGTGTTCCCAGGTGGATCAGTACCGAAAGTCTGTCGATACTAAGGTTAAGGGGTAGACGGGGATTGTTTATTGACAACGGTTCGTCCTTCGGGATTGCTTGATGTGATAACGTCTGATTCGTCCAGTATATCGTAGGTCTCACCGCTCCACGGTACTGTCTGATTGAGAAGCTCGGATTTTCCTGTTGCGCTGAATCAACTTCGAGGTAGGCGCGCTCATGCGTACAGGTAAAGAATTTTGCCTAAAACGCCTGCACGCGCTATCATATCACTCTGAAATAGGTTTTCTCGCCCTGAAACTAAAGTCCGTTGACCACCCGGAGAATAGGGATGCCGTATTATTTAGGCGGGACCTCTGGTATGAAACTCTTAGCCTCGTTCTCCACGGTAGATGAGTCGGTTCTTCATAGCCAGTGTACCGATTGCGGCTTTATTTGATTGGCATACGGCGGAGTTCTTCGTAAGCAGCTTATGAGCGCACTGTGGTATAGTTAAATGGATGTTTCCCGTCTCCATGATGTTGCTGTTCCATAGCGCTACGGAATAGATTGCGACGGTCGCCTTGGGGATTACTCATGTGTCACTTTGAGGTGAGCGAAACGCACAAGGCCCCTTGGGGCGCTAATTTTATTAGGCCAATCATCCGGGTCGCTGAGAAATTATTCGATCACGTATGGAGGTCGTAGGTGTACAAATTAACCACCGGGCTAGCATACGATCACCAGATTTTACTCAAGACTAAACTTCTTTAATGTGGGTCTAAAGTACGAAATAACCCGTGGTCTATCAGGAAGTGCTGTTTTAACGTATAGCATCTAATTTTGGACGAGCCTAATGTCGCCCGTGACTTAATTAGTCACAATACGTACGTTATGACGCGTTGACGGCTCCTAAGGTAACCCCTAGCCATTCGAATCAGAGGAGATTGTAGGCTAGACCCTGCGTTGGTTGAGCGTCCACCTAAATCTTCAGGATGTGTTTAGTTATTTCGACCGAGTCCAATGTAGCGCCTCCGACGGCTTTTTTGAAATTCACTCCGTCTTGTTAATTGCCGTAGATGGCTCGAAAAAATATGCTGAAGGGGGGCTAGCATATTCGCTAAGCCTCATCTGGTGTGCCGAGTAACGGGTCCATATAAAAACCCCGCTCCTATTCGTTAATGGGAGCTGATCCGCGTCGTTATTGTTCAGCCTCCCTACCGCAGTGGGAATGACTAGTTAGAAAGAGTGAAGGATTATTACGGCCATGTCTATTTAAGTTGATCCATGGATATAGTAAGGCTATTAACTACGAATTGCAAGTTCACGCAATGAAAAGTAGCGCTTGTATCGCAAGTCATGGGCTAAGCTTAACGGTGACTCTATGACAGCGTAAATACTGGGCGCAAGTGGTTATCGATATCTCCTGTAATTAAGACATTGGAAGCCTTTGGGCTATAACCTACTAGCCCTGTGGTTTGTGCACATAGAGTTAGACGGCGAGCGCAGCCGATGCAATGATCAATGACTCGACGGCATTTCTCATCCAAAAGTCGCCAAACCGACACTCCACTAACAATTGCTAAGGAAAGAACGGGATCATGGTTTCGCCATTATCCGCATTGCGGATTCACGCCAGTTGTTATTATTTTCCATGAGTACGGGCCGGAGGCTAATACTTGAGCGGATGTACACGGTTGCGAGTTGGGGCTGCTTACGCCACTCGTATCATGCGCGGGCGCGGATCTATCGGTGGCTACGTAGTTGCTTAGACCTATAAGGTTAACGGCGACCTCGTCAAATCGTGGAATGCTACAGGACCCGCAACTGAAAACAAGCCGACACCGGACCAATCGAATAACGGACTATCGTGTGCATTTACTCGGTGGCAAGTTCGTACACTTACAGCGAGATATAAAACGATCTGGCGTTGACTTAATCCCAATTGGCCGAGCCGCAGACCAATTCGGCCTTCGAACGTATCACTTTGAGGCCAAGGATACAAGATCACCCGTCGTTTAAGCAGAGCCACCTAATGCTTTAAGAATATGGAGCCTATTCACCTTGTTATTGCGTCTGCCCATTCCCTACTATCGTTTTGGGAACTGAGCATTCGCCACCACAAGCTTCAGTGCCGACAGCACGAGAATGTACGTGGTGGCAAGTCTGAAATGAAAGGTACGTAGCGGTATCTACGCCCGGTCTCTGCTTGCTCTATACATTTGATCCATTGGCGGCGGCGACTGTAGCTGAGCCGAGGGATGTGCTATCGGTTCGCATTCAAAACCTGTCATAGCGCGCGACAGTAACTGGGTTCACCTCGTTATAATTGGGGGATACGAAGCCCGCCCTATAACATGCTAACAACGTAAATAGTGGAAATGTGCCGCACGTAAAGGACAATGGGAACACGGCAAAAAACGAACGGTCTTAGACACTGTTGCTGTGTAGGCTCTCCCCCTTTAATAATGTAGACTCCCTTGTCCGGGTAGCACAGCATCCAAGGTCCGTGGTCTTCTAGGCTAATTTCGTGAACCGCCGGCACTCATATACTGGTAATAGATTCGCGGTTCTGCGCCTAGGACATAAGGTCGGCGCGCAAACCCTGTCGCACTCGAGGATCACCTTAGTTGTTCCACATGAGACCGCTCCCAGTTAGAGTGTTTTGGAAGCCTGGATCATCTGGCATGTGCGCTTGGGGAACAAAGCCCACAGCAAGAACTTTAACAGCTGAGACTAGATGCAGAGGATTAATTGTCCCCTTAATCACTTACTTGAGGTGGATATCGCCGCAGATCTCTGGCCCAACCCTACGCCCAGCATCCAACTCTTGCAGGATTTATCCCTGTTCGGTGGATGACATGATCTGGTTCTATAATCCGTCCAGCGTAATGGACACGAAATGAATAGTACGGTTGAAGAAAGCCCCCAGGAGGGTGTAACACAGGTTACGCTCGTGTCGTCCTCCGTCAAAGAATGCTCCGCAGATCTAATCTACTACTAACAGCGCATTGACCGCAATTCACAGCTTCACGGGTTGAATCCGGTTATTCACGGTCCATTTATTTCATCGTGACCCCACCGGCCGCTAAGCTATAGGGCGGCGGAAAACGCTCCCAACCAGGTTGATGGGGGCCGCTATAGAGGATTATGCGATGGTGTTTGATTTATACCAACCCGTCCGGTTTTGTAATGGTTCCGATTGCACGCTCGGCGCAGTGGATAATACCTCGTAGCTCCGCCAATAGTGTTACTAAGTTATAGGGAAAGACGGCGAGCATGTGGTAGGCGGATAAGGTCCTCTAGAAGAGCTACGTAGCCGTGGTTCCCCGGGGCCGGACTCCGTGCCTTTAGCTTTTGACGTTTCGTTGCGTCCTGCATCGATTGTACCAACTCGACAGCCCAAATCATCAGCGAAACGGCGAGTCTATACCTATTACATACGCGGTAAGCGGACCCTCAACGAGCCAGCACGATGAAACATCAGTCTGTTCGAAGCGTCTCGTTTCTCCAGGTGAAGGTCGCACATATGAAAGATACAACATTGCTTCATTTTGCAATAGATGTGATGTCCACTGTAACCACCTATTGCCGATGCCTCATACGTTCGGTTACACCGAGATTACTGCGACTATCGGGTCGGCGAATATGCGATTCTCCAGTGGATCTTTCTGATACGCCCTTGAACGCGCATCTCCCTTGAACGAGGTAAAACATGGTTCAAATTGATTTGCTAATGAACTTTCTGAGTAAAGCATAAAATTGCCGTACCCGCAGCTCCCCCTATGAATGCTCTACGAATGGGGTAAAGCTCTATACTCCTCTGCGCCCGGCGATCCTAGACTTTGCAAGGAGCTGGCTAAGATGATAGCGAGGGTCCTTAAAGTAGCAGCTACCCATCGGTAGCGACAAACTCCGTCCCGTCGATCAGACCCGCGTCGCAACACGAATATAATATTCCACTTGCCCGTCCGCATTACGAGTCCGACCACCCGGAGAGAGAAGTGGGGTCAGCGAGGGGAACGTGTGTACACTTCCTCGATACTTCCTCTACAGTGAAGCATATCTCGCTCTAAGGCCAAATGCAACAAGCTCACCCACAAGGGTCTTGACTTCGCTGGGGCTTGCTGAGGGCCCTTACACTTTAGTACGAAACATGCGTCTGCGACAAATTGATGAAGGACCCTATGGTCCAGTTCACCCTTCAAAATCTACGGTCAGTGTCCGGACCTTCCGCCTGCTTCGCATAACATCCATAGTAACGATAGCTACCAAGTTAGTTTCCGGTCGTATGAGGGGATACGCGTGAAGTTAGAAACAAATGTGCGGTTCCCGCTATCAATCCCAAATACTGTAACCCTGAGTACCGTAGGCTGAGCGTCTACTCTAGTATCATCGGGCGTGCACTGTGATAAACGTTGGTGGAGCAACCTTTATCTGAACATATAAACTTCGCCACCGGCCTTACCTACCATCGTTCGTACATAGAACCCATTAGAATAGATGTTCTTTGAGGGAGTTACCATGTCACAGTTACACATAAGGACACTCCGGAGTTTTACAACTCAGGCTCGGTACCGACGGGAACCTGTGGAACTGCGTCTAACTCTGGAGCAGATCCGTAGTCCCGCGGACAGCGCACTGATCTTGACGCTAAGTGAAATTGGTAGCACGTCGTATTCCCGGCGTTTTCCAATATTGCAATCATCGTTTGCTCGGTGGAATCAGCGACGGAGGCGTATTTCCAGATATATTCAGCTCAAGGGAGTGGGTGGCTGCGATACGCATGCCGGATTTACACATCTGAGCATTAGGGAATAAGGTAGGACGATCAGCACTCTTGAGGTTCCGAACGTATTGGGATTCCGAACGAAAACACGAAGTTAACGCGGAATCAGTCATAGACTTCCCGAGTTGCCAGCTGTGGACGCACTGCCCACGTTAAGACATAGTCGCGAGAAGCATAGCAGTACAGGTGCGACCGACTTTTGTACTCTTCTCTGGAGACGGTGCAAAGACGGCAACCGACTGGTCAGCCCTTACGTTATCGCCTCAACATGTATAGGGTGAGCGAGATCCTTCATTGGTTGTCACGGTACATCTCCCAGAGATTGCAGTTCAGACTAACACGAAAGTGCATTGTACTCAAATCTTATCTCAAGAGCCTTCGGGTTTATTCATTGCGTAGACGTGGCCCGCTGTGCTCTGTAGTTCAATCCGTGTAGAATTACGAGAAACTCCATGTTCCGTGACCGTTTCCCTAACCCGGCTATTCTTCGCGGCGCCGTTCATCCTTGTTGTCCCGCAGCTCGTTTACTTGAGTATTTCCTGCGAGCCCAAAGAAGTCCATGAGAAATTCATAAAAAAACTGAGACTACTGGCGGATCAAGCACCCATAGCTGGCGATAGCGTTACGGCGCTATACGTTTACGGGGATATTGAATGTATCTTCGTAGTCAATCCAAAACACCGACGATCACGGTCCTAATTATGACATCTTTGTTCTCCTAGCAGTGATTTCTATGAGCTTTTAGGAAGAGGAATCGATTGAAAGCAAGCAAGTGGCTATCCACCAAAAACCGGAATGGCGCAACTATTTAGCACTCACGGCTGTGAAACAGTCGACACACACTCTTTAGTCAGCCATGAGTTTCTTCTCTTGCAGAGATCTCAGCCGAAATTGCGCTCGCCAAAATATACAGACTGACGACCAAAAGAGTCTTGGTGTTGTTAGCCAGTTAGTTAGAGTCTTATAGACAAGGTTGCTGGTAACCCTTCTAACGTCCGGCGCTTGGAGGACTATCAATCACGTCGTATCCTATGCGTTTTCACGTACGACGGCCTATTAAAATCCTGTTTAGGTTTGTAGGTGACTGGCGACCTCTGATTCCCAGCTCTATAAAGGTTGGGTCAACGTTCGCAGTCATCTAGTCTGGAGTAAAGCATCCATTTGCTATGAAAAGCCTCCTATAGTGTACCCCACTTGCCTCGGTTAGACCTCTCCAACAATTTCCTACGCTCGCACAAGAGAGAAGCATCGTCATTGAGAGTGCGCGCGTAGGGCGTCCGACGCTCGGCTGTATAGAACGACTCCGTTGCCGCTAACTAGCGCTTACTGGGCAATACATACCATTGCCCATTTGCCGCGTTCGCACTCCTCAATGCTTTAGTTACCTGAGTCATCATACAAAGCTTCCTAAAAAACCGTCTTGAAGCCAAAGCTAAGGATATACTCCTCCATTTAGTTACTTTTAAATCGGCTACGTGTTCCAAGTATATGTCAAACGTCGGATATGCGTCACGCATTACAACCCAGGCTAGCCGAGTATCCTGATGAGCCTGCCTGAGTGCACGGCCCTTGTCACGGTATGCCCTATCCTGAAGATATGACAGCATACCGGTGACTATATTGGTCGCGCGGTGGCAAAAGGATGCTCCAGCAGTCGGTCCATGATTTTATCGACCGCTAGGTGGGTGGCCTGGAGTCAACATCCAATAGAAATTCGGCCTCGCATGCCACTGGTCTCAATTCTTCCATCCCCACTTAACTCCGTTAAACATTGTCTGTAGGCGCCAGGCCCAGCTCGAGAGTCGGTGTCATTAACGAAGGAAGGTTCTTGCTCTAATCGGCTCCCAGGGACCAAAGTCCTGGATG +2 GCATTCCGTAATGGGTGACGGCGTAGCTAGCCTACTACAAGGACCGTTTCTAGGGGCCGGGAAAAGGGTGACGTCCCTCGCTCTAGACTGTGTGTAAAGGGTCATGTCATTAGCCTAATCTATGTATGCACCAAAACGAAGCGTGGGGTGTTTGCGGGTCTCAGTGGGTCCACAAGACCCCTTTTGGCAGAGTCAAGTAGGGTGTGCAGTGCACTCACTGTTATCTAGGGAGCGGGAACCCCTTATGATGTCGTGCTTTAAGTTCACGATCGAAACTGCTGCAGTCTCATCACAAACCCTCCCCTGTGCTGAACAGGCAGACTTGTGTTCCATCACCCAACGCTGCGAACCCACGCAGTTGGTATGATAGCTCCATCAACAATTAAAATTGGTGACGTCGTTTCCGGCCGAAATTAGTGCATGTAGCCGTGGATCGCAAAACGCCGAGCTACCACGCCGCGACCCGCGAAACGATGCTTCAGATGAGAGGGTTGTTCGCCCGCTCTCCGACCGAGCAGAGCGTACAGGCTTTATTTGACGACTAAGTGTGATGTTTGCGGTGTTGTCCCACCAAGTATACCCTACGTGAGAGGACCCAAAGGCCAGAATTACTGAGTGATTCACAGTATAGGAGCGCTTGTCTCAGTCGAAGGTGGACATAATGGTACTCGGTACCTACCGCCTAGAGTGAACAAACCCGGGGTCCGATTGTAACGGGCCGATTATAGATTGCAAGGGATCTATCCTAGCCAGTCGTTTAGCCATGAGGGGACAACGCCTGGACATACATACCGCCCGTGTACACGGATCAGTACGCCGATAGACTTTCTAATCCGGAACTAGCTAGGGTAAGGTGGTGCTATTGACGATTCCGTCCTTTGGTGTTCAGAGCTCATTTAACTGGACAGGGCTATAATTGATTCAGTGTGCGTCTAGAGTTCGGACCCCCCCATAGCCGGGGCAATCCTTCTATTCGCTAATAAACACTTCATGTCAGCTTACCGCGAATAGACTGGACTCACCGGCCGTTTGTACGGTTCGTCGCTTACAGGCCTGCTTCGTGCTCGTGGCACTGTAACAAGATCCTAGAAACTTCAAAGGTCTCACGAAAGACACACTGATCTCTGCCTCTGTGCTGGCGCGTCACCAGAACTAACAATAATGACCACGTTATATATTGAATAGACACGACTCTTACCAGCCATTGACTCGTGCAAGAACCTGCGTGCTGAGTTTGCAAATGAAGATCTCAAAATGCGAGTTCCTAAGGTGTGTATGACGAAAAGAACTCCCTTAATGCAATTGATCCAAGGAGTGTATGTCGCCGTAGTTGTCGAATGTGTCGCCCTGCTAGCGCAGAACTAACCTTGGACCGCTCTGCTCGACGCACGTTTTCGATACAGGGCGGATCGTAGCTGACGCTCTTATCTTGCTCACGTGAGATCATTGGGTCACTGAGAACGATCCCACGGACACTCTAAGAGGAAAGCAAGGCTAAGGGGCTGTATTGAAGGGCAGGCTCAGTGAATGGGCGGCTGGAGACCGTTCACTATCTCCATGAGAAAAATTCCCGGGATCCTGGCCCTAGTCGGTGAATTATTTTCGTTACCGGGCAGGTTTCGTGCAGCCGTGCAGCAAAAATTGTCTTCGGCGATCAGCTATACGGGTTAGCCGTCCGTGCATTGACACATTGAACTTCGAGCTCACTTATATCATGCACTGAAAGCCTCTTCGAGCAAAAAGCTTGGCAAGCACTTCATACATTCGCACATCTCCACCTACTCGGTTCTCAAACCAGTACCCACCTTGCCACAGACGTACTTGAATGGAGCCCCCATAGAAAGGAAGGTTGGAGGGTCAAGTAAATGTCATGTTGGTCAAAGGACGGGGCAAATGATTGTGGCGGTCTCGCAGCGAGCCTTGCCTAATTGGGCACGCGTACTTGAACAGCTGATTGCGCGCCCAGGCGGCGGGAACATCATTACGCCTTTGGCCTGATCTTCTATCGGGCTACTACGAGACTGGGAAGTCATATCACAGCGATTGAGATCTAAGAGGCAAAGTTTTGGTGGTTAATCAGAGAGGGAAGAACAGATAATATGTTCCCGTAGTCTACAGGGTGTCAGTAAGACTATAGGTATGTACACGGTATGGGTACGGACTTCCCGCTCAAGATCAGGATCCTTTCCTAGGGGTGCAATGCCCGGCTAGGTCGGTACAAGGCCAATCAAGGGTCTGATGAAATTGCGTCTACACCGGTGTGTAGTTACGAAATCTCTAGGATCAGAGCCGAACGAGCTACAAACTAGGTTTTTGTTATCACAAACACTACCCCAAGCTTTCAGCAGGTTTTGAAGGTGTCTGTAACCGTATGGAGGCGCGGGGAGGCAATCTTCCCTCTCTTTGGCAAACGCAATACCAGGGAAAGCCAGCATAACGTAACAAGTTTTCCCACCAGCAGACTTGCCTACCAACTCGAAGGTCAGTCTTAGGTTACGCGCCTAGCATGGTTGTTATGTATCGTAGCGGCTGATTCTAATAGACACGATTAGTGTTGCCCGGTTTCGGCATAACGCCGCACGGATCGAGGAACTTCCGGATCCTCCGCGGTCGCTATGCTCACGCCGACGTCCGGTCGAAAAAAGTAGGGTTTGCGGATATCTTCGTCTGCGTACTATCACCTTGGCTATTGTATGCGTACAGCGGACGTCAAGCCGTTACCCCACGATGTTACTAGCCCATTGGGCGTCGTCACTATTTCATATCCTTAAGTTACTAACCGATAGCGTGAATCCGTCTAGCGCGAGTTCCGCACACTATAAGCCTGACTTGACGGCTGTGCCAAGCACGACCGGGATAAGGCGAGCTAGAATCAGGGGCCACGGTTAGCCAGCTTGTACCGAGGACATGCGGGTCGACCTTCCAGCTAGTCGTGTGCCAACGCCGCTGACTCGAAGACCGCGACCCAGGGGCTAATTGTACCAAATCGTAGGCTATCATACTGTATCTCCCTTCCTCCCAGCGAGCCGGCCCGAGTATTTACCGACGCCGTAATGAATCGCCAGGCGCGACCTTGAATAGGCCTGCATTACTACGCAGATGCTCACTGAAACTCTCACATATACTCGTCTCCTAGCTGAGCTATTAGGGCAGAGCGGAGTGTCCCCCTACAAAGTGGCCCCTTCTGAGGACAAGCCTCGGAATGCCCTTGATGCACGAATCCCGTTAATTCACCGAGGAGTTAGGTGATCCTTTTACAGGTGCTTACTGTGCGGTTGGGGTGTGCAACCTGAAAAGAGCGTCGCACATTAACGCGATAAGTACCCACGCTGAAGGTACTGATAGTTGACTTTCTGTAGAATCTAGATTTAGACGATCGCGTCAACGTCCACCGGTTTGTATGGAGTGTCAGCTACGTTGATCAGTCTAGACATCTTGCATGATCCTCACGAAGGTTGCGTCCCTTATATCTGAGCCGAACCTGTAAACATCGGATATACGCCGGACTGACTCATGAGCGTTAAGGTGGCACTTGGTCCCCAGTTGGTAATGTATAGCGGCCTGGCTTGCATCAGATCACAGCAAGGTTGCTTAGCCTCAGGGGTGCTGTACTACCTACCTCAGGTCAAAACAGCGCTTAATACGTCGGTGAGTCGGTAATAGCTTAGACTTAAGGTGCAGTCGCCAAACGCAAACGCTGTAGAGGCGTCGTGGGACTAGGTCTCTAGACTCGTGATCTTGGATTGCACCCTTCGCGATGGGCTCGCAGCGAGCGCGTATCGACTGCGAGCTGTATGGACGCCTATGATCTTGATCTGCATCGCGCGCTTTAGAGGGAATTCCGCACCTTATTGTTCTCGCCAGCGACCCTCTTCTATGGTAAGAAGGGTAGACTTTGTATGTTAATGAGCAGAAGATAGTCGGACAAGCGATATTACCAAGGAAGACGTACTCGTAGCCGAAAGGCTCGCGCGGGATCTCCCTAACATTCATAAGACAGAGTACAGGATTTCAATCCGTCTGGACATACGACCAAAGCCGATATGAACTGGCCAGAGAGAATCATTCCCCTCGTACTCCCTGCGGGCTTATCAATACCGATCTTAACAGCGCCATAACCAGGCAGGTAAGTATGGGTACAGTTCGATCCGCTTCCGGGGAAACCCCGGAGCAGGATCTGTACCCATCAAAGTCCTCGTTAGGCAGACGTGAAGGCATGGACGCTCGCTGTCAAGCATGGCGATGCGTCTGCTTTATCAGTTTGAGAGCCCCGCCCTTATAGACATATATTGCGTCGATTCGTCCTCGGCGTGAACCGACACTTGAGTCGTGGCTCATGCTGTCCGAGCCGAACTCGAATCCGCAACGAATCACAGAATTTCTTGAACGGGCACTACCCCCAAGCCAACTTGTGGGTAACAACAGGGTAGCGGTTGAGATAGTGGTGCCGGACAATGCATTTAGACGGAGGATCTGCCTTCTCGCGTTGCTATGTACGAACACGTGCGACGGAACAGATTGTACTGGCAGGTGTATTCGGTTGTGTGCAAAGCGTCGAAGTGGTCTTAGCTATGACACTTGCAACTAGCCGATACACAACAAGCCGGAGTGGTCAGACTCAAGTAATGAGCGTTGTTGCCCCGTTGCTTAGTTAAGCCCGGATTCCCGATGATTCACTCCTTACCTGCGCTCTGCACAGACCCGCGAGATTCCAGCTAGATCTCTAGGGTCGCAAATTCATCTCACATGATAAAGTATTCTATCAGCCTTGAGAGGTGCAAGTTTTAGTCTAGCGAGCGGAGCGGGCTCTGGCAATCTTTGAAGTTTCGCGTTACCATACAACGCGGTCGGGAGACAGGAGGTAGGGATGCGCCCTACTCGTCCCTCTGGTCATTCATTAATAACATAGACGTGTTTCTAGACAGGTCAAAGTAATTGACAACTTTCAAAGATAATCCACTGACACTTAATTCGTAGGCTTCACTGAGTTGGTATATCAACAGACAAGGGTATAGACCGTTAAGCCGCGGCCCGTTAAAGCCAATGATATCCCTCTCGTATCGTACTAGACCGCTAAGCTGACCCAGAATAGACATGTCTGTAAACCCTCTGGTAGGGGTAGCACTCGCATTTCCGTAATGGGCCTCTTGCGGTCCACTGAGGACCGCAGTCCTCGGTGTTGTTAATGCAGGTACTTTCAAGTCTAAAGTACGAGACAACACGTTGTGATAATATACCCTACTTTAACGTCTGGAACAAATGCTTAGGCGGTGACCTCGGACCCGGTCCTCTAGTGACTGCAGCGATTATTTCAAGCACCGTATTCTAACACCGAGGAGCAAGCTGAACATCGTAATAGGCCCAAGGGCGGGGCCCCGAAAGCTTTTAGTAGGATCTTGGTGGGGGCCGATCCTCACACCCTATAGCCTCCGGTAGCTAGCATCTGGATTTCGAAGATACCAATCTTCCTGAAAACTCAACGAAAACTGTAACGGTGGACCATGGCATCCGTACACGAAGGGCCCGTGCTTCCCGTGATGCTTTTGATGGCCAGTTACAGCTCTTTCCGGGCGAATTGTTACATGGCCACTACATCAACCCGCACTCAACGTCCCCATCACTCCCCGACCGGTTAAGCAGGTCTCCTAACTTTAAATGGACGGGCCCACATGCTTGAAAGGCGCTGTATGTTTGGCGTATCGTAGAGCAGAACTCTATGGAAAATAACGTGCCTTCACCTGAGGAGATCCAAAGGTACTACAACATAACCGAAAAGAATGGCAGGCAATTTCTCCTTCTCACGCATTATTAAGCCCAGGCTTGCTTACCATGGCGGTAGGGTCGTTTTTTGAATAAAGCGACCTACTGCCAAGTCTCTATGTCCACGCTGCTTATCGTGTCACTCCTAGTCGGGCACCTCACCCCTGTGATCCTAGTTAGCCAAATGCCAGGCCATTCTCTTGTCTTTGACTAGTGTGTGATATCACGTTATGAGTAAGGGATTATCGTAACTAATACCGTAACTATACAGTCAACAATGGGCTAGGCCATCCTGCCACCCCTTCATTCAAATTACTCCTTCTCGGCAAGCCTATTAGTGACACCAAGTTTTGCCTGCTCTTATCGGGACCGTCACTTCAGCAGTTGCTACGTGGGCGCTCCTCGATCAAGTTCAATTAAATGTTGGATTGGGTCCCTGCTGTTGGAATTTATTGGCACTCATTCGCAACCGTACTGATGGACTCGCCTCTTATACTAAATCCGGGCTGGGAATGCTCGGCACTCACTAGAGGTATTATACGGGACCGGGGCCCCACATTGTAGGGTATTCGCTTCGAGTTTTTCTCGTCGAGAACTAGACACTCCTAGCGATGTGACTTTAAATTGGACGGACAGAGACGTGACAGAAAAGGGTTCCTGAGACACTACGCAATAGAACCGTGATGTTTTCAACAGCTATCGTCGTTCCTGGGTCAGGCTCCCCGCGTCACCGGCTGATACCGTTACCTATCCTTCATTAGGCCAGGCATCGGATTTGGTGCAGAGCTGGGATCACTGCACCTCAAGTTTGAGTATACATTGTTTAGGAGTGCTTCCCGGTAATTACGTCAGAGGTAGGGCGAGAGGTAGCCAAATCTCCTATCTCTGGTTGAGGTTCTAGATAATGCCAGGTATACAAGAAGGAAAATGCCGCTTGCTCATGATATCGCTAAGGCGTAATGGTTCCCACTGTGACTATTTAATATCTGGGCTTGTGACACGATGACATATATGCCATGGGGAAATTAGGTATGAACCGCCAGCATAAGACATCACCGTTGCACCACGGCTTGAGAAATTCCTTACACAGATCCAGAGTGTAAAGGTGGCTGGAAAGCGGTCAAGAATGCGACACGACCTGGCTTGTAGTTTACATAGGTGTTCCACTGGTATGTTAGTTGCGTGACGGGTAAATAATCCGGTTGGATCGCAGATACTGCGTGGTGACTTTCCTGTTCGCCTGGTGATCTAGTGTGATTCATTCAAAAGCCTCGCTGGCGGCCTTGTCCTCGGTTCTGCTTTCTATCCGCACGTCTGTAATATTCGATTGCGCACTATGGAGCCCGCTCAGGAAAACAACTGGGCGTTCCCTGCGCCAGCGGCGATCGAAACCAGTCGACGTTCCCTTACCCGACCGGGCCCTACGTTATAGGCCGTAGATTCCACCGAAAGACCTGATATCCTGACCCCATTGGGACTATTGCGCCGGCCACATAGCTTGGGTAAGAGCTGTGCGACGTCTGATTAGATTCGAGTCCAAACGGGCACCCGGGAAGGCAAGCGTCGAAGCGGCCGTATTCCGTCGAATGAAACATTAGCACCGGGGGACGGATATTCTTGAGTTGCCGATGTAATCCAATGAAGGCCATTGCCAAATGTTAGGAGTATGCGAGGGCCTGGTTAAGAGGCCTTTTCCACCCGTGCCGGCTGTAAATACACGCAATTCAGGAGCGCCCAGTGCTCAGTACCCGCCGCAAACTATCGCAATCACCGTTGGCAGCCTACCGGTTGTCATGAAGACGAGGAGTTCCCTCGTTTTTTAGCGTACGCCGCATGAGCTCCATCAAGCCCAATGTGTTGGCCAAGTTTCCGCGGAACAGCGGTGGCGGTTGCAAAGGAGCGATGGTCCAACATAACATATGACTAATACTAGGGCGCAGTAGTTGCGAGTTTGGTTCACCGTGGTCATGGTATACGTAGCTACGTTGTGTGTCGGTACCGGGTATTCTTAGAGTGGTTCTGCCCTGGGGAAATTCTATATGTCAACGCCCATTATGCTCATGTTACGGAGTATATCCTATCTTGCGCGGGATAACTAACGATGAGGGGCGGTCTATTATTGGCGAACCTTAACACGCTTTCAGCGAACCCGGGCCCGATAAAGATTCAACCAGCTCTGAAGACGTTGACAAATGGCTTATGATAGTTGAATCCTAGGAATAGAAATGGCTTTAAAGCTTGTCATATTCTCCACGATCACCATCCGTCATCAAAAGATCCGCGACATCCCACCAGTAGTGCATCATAACTCCATTGGCGCCGTGCTCTCGCGTGAGCTCCGAACTCCGTTACTTTTTCGTTGAGATTATTAGGATTAGCCGGCGCAAGAACCTGATTGTCATGGACAAGAATCTCCATAGTCTTAGCCACGGCACAAGGGTCACTTCTGAGTGAAGGACGACCTAGAAAAACAGAACCGTCCAGGGGAGCAAACTTTTCTTGGCTCATACACAGCTGGCTCCCCCGGCAAAACTAGATTATCCCCGTAGGCGCCGGAGACCTGCCTACCAAATTAAGAGGAACATGGCATGTTCTCGCTTTCGCCGCCATATTTCCACAGTTTGGACTTTCAAACATGGCAGATTGGTGGTTGGACCCCACAGAGATGCGGGATGTATCTTTAATGGACGCTGCGGGTAGATCCCGTTGCTAAATGGAACCCGAGGGCTAATAACCTTTGTGATTCTATGGCTGAACGTGGGCGTCCGGCCGCAATAGGCGGGCTGAGTAATCTGCCCTATCCTGGGTTAAGCATTTTAAGGCGGCCGCAAGAGCATATATTACACTTATTTGCCACCGAATGCTAGCGGAAGCGCTTAATTTCGTGATTCTATTAATCTCTTTCGCATGCGAATGGTCATATTCCTGACTTCACGTGGCTTGGAAGAACCGGTAGATCCGCGCCGGACGCGGTCGGGTGCGCGTAGCTAGGTCCACCACTCGAATTCCGGATCTGTTTTTTCCCAGGATTACGCCGAGGGGGCTAGCTAACTCCGAAGTCGCAGAGTGTATTACAATAATCAAGGCCCTGGGTTCAGCTTGTCGGCCGACATCAACAGGCGTCGATCGATCAGACAGCGATATCATCCGTACCGCATATTAACTCCTCTTTGAGGCGCATAAGTGGTCAATCCTTGCCGGGAGAAATCTTGACGTCCGTGAACAGTTATCTATCAGACTCTGTATCTGATCCAGGATGCTTGATGTACAGAAGCACTCCCTGCTCAAGTAAAGCTAGGCCGCGACCTGCGGTTCGTAATCGAGGCAAGAATCTCGTTGGACCGTTCTTTCCTCCACCTCTCATCTATGCCTCAAATAACGGCTAGGGGGAGACCCTGACTACGCCTGCATGAATCATGTTCCAAGATTATTTTACTGTCCTCCGATGCAAGCAAGGCGAACGGTAGACTGTTGGGGTTGTGCGGCCGTTTGCACGCTTGGCGTCGGTACCTCGCGTGAAAGAGTGTAGACACTGTGATTTTTCTTAAGAAGGGACGCTGGAGCGGACCCGTCAGAAGATTGGGAAGCTCACACTAATTGCGGCTCAGCGGATTCCTTTGTGCAGCTTACCCATCCAACTAGCAGTCGCGAACGGAACACTAATGGGTAGTAGAAATGCGGTGCATCGCGGCGAGAATAGAACGAAGGTAATACTCCCATCACCGGTCTTTAAGTCTCCCAAAGCTGGCCAATTCTTAATCAGGAGTGATTGACCTTCGAGCCTTAGGTCTTAAAACGTCCAACATGTCCCGTTTGGTCCGGTGAGTGATTCCATCCCTCCCCCTGCTCGGGCTTTACTGCCCAGATTAACCTGCGAGTGCCAGCGAGTCTCCGAATTTTCTTAGCAATGGAGCTTTAGCAGTCGGGATGAACAGTTTCTATCAAACTTGGGGGACTCCCGGCCCCAACGCAGAGCGACCGCGCACAACTATGAAAGATCGAAGGTGTAGTGTTGCAGTCCATTTCGGCCATGAGACACTACGTAGACTATCAGTAGTAAGTGACTGTGCGGGCATAATAAGTCACTGACGGACCAAATCTTCTCTCATTATCGTTCTTTGTTATAACGGCTGTCACGTTCGTTGCTTTAGCCTCTCGGATCCTTTGACAATGCGAGAGTCACTCTAATTGCTAACTCAGGTCCACACGGGAAGCTGGGTGAGTCGACGCCAGTGAACCT +3 TTGACGTTTCGCATATCTCAGTTTAAGTGACCTCCTAGGGTCAGAGTTTCTCTGGGCGATGCAAATTTATTAGTAACAGACGTAATACAGCGGGTTTGAGCTGCTCGCCGCATTGTACACTGTGTTTGTCACACAAGCAGGGGGGGAACCTATCGTAGACCGCACGTAGATAAAAACCCAAATTGGAGAGATTCTTATCTGAGTCCCTGCTGACCTTTTTGTATATTTACAGAAGTAACTTCTTATTTTGATGAAGCGCATGTTAAGAACGTATAAAGCGGCATCCTGAGGACGTATGCTGATGACTGCACTATAGACAGTCCTAGGTATCACCTGCCCACGGAACAGTGTCAAACAAATCGGATGCTCTTTGTTTCACAAGTGTCTGTTGATCACGCGAATGGCACTTAGAACGAGAGCGGGGAGTGGCGTATCGCACTTAACCAGTCCACCCCGCCGAGCGGCCCTAAATAAGGGGGTTAGCGTAATATGTATCCCTAAGTAGTCTTCATTTGACGGCTATAGAATACTGGTTTGATTACCTGATTGTACCTTGGGGGAGCGGCGTGACTGACTGAATCTATCGGCGTCGTGATGACCGGCCACGATTGCATCTTGGAAGTGGTTAACGGTTCACTTATTGCGATGCAAGTCGCCCTCATAGGACTAGGGTGCCTTCTGCCCAAAAGCGGCAAAACTGGCGTCATTAAATCACGGACCTATCCTAGAGGAGAAACACGACATCCACGCTAGATCGGGAGCCCGCTGCTGCCCCCGCACAGTCGAATATGCCCGTCGTAATAAGTTATCACTCCCTGAACAGGCACTCCCCGTCGGGATCCTAAATGCCGGCGCATTATTGTGCCTAATTCGAGTCTTGCGCGTCTAAGGCGCATTTCCCTTCTGATTGATCGAATCCTTTCCGCCCGGTACCATCCTTGTATAGGCTCCAACGCCGAGGCAACCCTTCTGAACGATACTAAAGAATCAAGATAGGCTCTACGACTTTATTCGTGCACCACCGACCGAATGGTGAAACCGTCAGAATGTACGCTGGAACGAAAAGTTCACGACGTCACTACCGAAGTTTACCCTTACTAACGAACCAGTGGGGAATTAGCTTACCGTCCAGCGGACGTCGTGAAGCGGGCACTCAAAAATACCACCCCGAAGTGACTAACATGGACAGCATGATCAGAATGGACGAACAAGAGCGATTCACTGAGCGCTGTGCCCGGTAGGACGAGTCTTTAGTCGTGCGTTCTGAAATAAGGTGTAATTAGACGTAGCACAACTGGTTATTTAACCCCGTACGCCAACGACGACTAACCTGACCGGCGGGTCCGTCTGCCTTCTATTTTGCTCATGTACATCGTCAAGTTAGATACAATGTACTAAGTCATCTTAAAACTTAGAAGCCGAACTCATTGTGCTGACCTGAAGACGTTTGGACTTCACTTAACAGCACCTGGGCCCGGAGAGGTCAATGCGTTCTTAGAGAACTACACTTTCCAGCGGTATCCCTGTTACCCTAGCTGGATACTGAAATCTATGTACTTCTAAGAGAATTAGAGTATGATGAACGGGCCTGGTGTATCAGGTGATGACACGTGACAGCAAAGTGTAAGCGTTTATGTTTGATTCAATCCAGCGATAGACAAGAAGGAATCCGAACCTTTGCGTGAACCCACTTTCAAATGACAGATCATTACTTAAGAGATGAGGTGTTGGACGGATAATTGGAGTTGGGAACACATCCTACTTAGCATCAGCTCCTAAAAGATGCACGGTATAAGTTACCAGTTGGTGCCGCCGATCTTCTGAAATGGAGCGCCCTTATGAAGTAAGGATACTTTGTAGACTAAGTGCCTTGTCGGTTACTGACCGCGACAGATTCTTCTTGGTGTCGACAGCGAAAGCTGACCTTAACTGTCGACTGTAACGAGATGGGAGATTCCAAGCCCCGAGTGCGGGAAGCACTCACTGTCGCTAACCCGATCAATAATTTGACGAGTACTAAAATGGGAGGGTGCCGCCGCGTTACCTATACTTCCGAGACACAGCCTCCGAACTTAATAGCTCGGGTAGGTATTCATTGGTTGCATCCGTAGTAGTCTGGCCCTCGTGACGCCCAGAATTCTCACGTGGAGGTAACTTGGAAACGGTGACTCCGAAGCATAGTAATGCCTGAGGGATTCTAGAAAGTGGGGCGCGCGTGGATGGCTAATAAGATAACTACGAGAGTACCTTCCCAGGCGCCTGATGCCTACGGAAGTGATGATCTTGACTCACCCGTGTATAGTACCGCCATTCGCCACCTATCAGACCCCTCGCCTCGTTTGCATACAAGTACAAAGCCCAATGGGGCCGGTACATGGTCCCTGCTGGAGACACGGCCCAGGACCGCTATCTTGAATAAGTTAGAATCGCAAGATTAGGAACGAAGGCGTTACTCTATAAGAGATCATTCCTCGGAGAGAGTTAAAGTTACCGATCCGTCCAGACTTCGGTCACTTAGTACAGATCCTGGCGCATCGTGCAGACAGGCTGGCTGTTACATCACGCTGACAGCGTTCCGCAGAGATGGGTGGCCCGCTAACTGCTAAGAAGCGGCCAAGGGCACCCCGCTGCCCTATGGACAACGGTAACGAATTGAGGGCTATTTGTCTGCGTTGACACTGGTCGCCCCTTCGGTCCAAGAAGAGCAATTTATCATGCTATCACATATGGACCTGAGGACATTGCGCCTTTGAGCATTGTAACTCACGCAATCAACAAATCCTTTAGCGGTATCGACCTACTGTGCCTTCTCCAGCTTCTGAGCCCTGGATAAATAATCGACGTATGCGGCAACCAATCGGGGTGTCTATGGTCAAAGCTCATGCATAAGCACGAAGATAAGTGGCAATAAGGAGTCAGCCAACACGTTCGTCAATTTGCGCGGATTGACCTCCAATACCGCGACTCGTGGAACTAGTGACCTAGACTTCGAAATCTTACTCTCACGCGGCCCTCCTGAGACCTTCTCGATACGTATCAGGAAATATGAGTTCTCGACCATTTACAGTTGTAGGTCTATTCTCGTACTCTGCCCCGCTGGTTCTAGTCGGAATAGAGAGACGTGCGCGAGCCCTACCGCGCCTCAGCGTCCACTCGGAAGCTTCCAGGAACAAGCCAGCCTTTCTTCATGAGAGGAATCCGATAGGTACATTCCCAGGTACGCTTTTCTATCAAAGAACGCCTTGGACATACTGTGCGCGATCCATCTCACACATTGAGCTCGTGGGGGGCAACTCCCTGCTAGTATACACTCCCGACACTCGTGCATGTGTCACGTAAGCGGGTTTGGGACTGTGATGGACCCTCGTAGCTTCAGACCGAGAGCTTCGGGCAGGCTATTGTACGCTCTCGTCCTGATAGCTCTCAATGGCTCAAGTCCCGACTCCCCGCCGCGGGTCGGGTGCGATTCCGTGGAAACAACGGCTAAGGCGTCCGGTGATGACCGACCAGAACAGGCCCGCTGATATCCGAATTTACTCCCGATTTGTTCCGTCTGATAGTCAGGGCGGCATGAGATTCGGTCGGTATTCAGGAAAACCAAGCCTACGATATTCGGTTCCATAGATCTCAACGCCATTCTGTACTTGAGGCATGTCGACGAACGTTATACGGTTAGTGGACTCTGCCGCCAAAGTTACAAGAGCCTGCTAAGTAGGATATCGCCTTAAACCTTCGATACATCCAAGCCTGGTAGTCGAAGGAGTACTAGTGTTCTTGGTTAATTTGCGATTCCGAAGGACCGTTAGAATTATCGTCGTTATTTCGTCGGACAGTTGGACTTTCACACTTCTACAGCACTGCTCGCCCCCCATTTGTACGCGCCGAATTTTAGTAGTGTAGGCCCGTCTGGCTTATTTCGTTTGGTAACTCGACAAGGACAGTGACCACTAACTCTATAGTGCGCTTATCCCCGCGACGATCGGAACTAAATAGGTTCGCGACCATTTATTTGCGGTGTCACAGGAGCAAGGTCAGATGAATGTATGATATGACTAGGCGGTTGGACACGTCGGAGTCTTGCAACGAGCGGCTCGACCGCGACCTTATCTGACCCCTAATTATGTAGTAGTGCCCGATGAAATAGCGATATGCTTGCCGCGATTCACTTCGGGAGAGGCTACGTCCTTCAAACCACGCGTCAGGCGCACGTACGAAACCCCTAGCTCCTTCCCCTTAATAAGGTAGGGCCTATTTGAGTGGACTGTGACCAGGGCCCAAACAAGCATGGATGGCTCCTATTTGCTATCGTGTAGAACTACCTGACTAAAAGTCCCGATGATTGTACCGATTTTGGAGTAGTCGGCAAGAGACTCCTAGCTCCAAAGGTTTCACTCGCGCGCGAGTCACACTTACGCTTGAATGCGTGGAAGTTGTTAGAACCCTCGTCCGGGATCAAGCGCCATATGAGGCCGTTGTCGAACTAGGTTGGCTTTAGATGAAACCCGGTAACCTGGTGGATGGTGCCAGTACTACAGCGCTGACAAGGTTAATGCATCAATGCCCTGTTCTTTGAGACCTTTCCCACAGGGCTCTATCCTCTTAGACCGGGCAATATCACGCCCAAGGGGTTGGCTGGCGACACGTTAGTAAGGTATCTTCGGTTACTTGTAGTTCAAATTGATCCGTCCGTCTGTACCAAAACAACGAAATGCTCAGTACTTACACATTGCCGCGTGTTAATCTACCCGGAATAGACACGCGGGTTCAATTCTTGCGTGACAGTATATTTTGAGCACCTTCTACGATCATGGGTAATCTTGGGAGTATCGCATTACCGCTGAACTCTATTGGAAGGGAGGCCGTTGCTATGTGTGGGCCAGGCCGCGCATGTCTGCATTAGAAAACGGTAAAGCGAGCAGACTTAGGATTATGTATTGTAGTACCTACAAAAGCTGTGCACGGAACGGTCTTATGATTGTTGGATCGATGTGATTAGCCAACAAACGTGTGCATATCAGGCTAGTCCCAGAGCCGATAAGACTGACCATAAGAGAGTATTTGCGTCCAAGCTTGGCAAAAGGATGCATCTAGGTTTGGTTGCAACCGACCCCGGAACGGGCCTGCGGCCCACGCGTCGCATGGAAGAGGTGTGATGGATCCAGCCGACCTGTCGATGCCTTGAGCCCATAAGGATAGTTGGTTCACAAAGTGATAGGCTTCCCGACATAAAGATATTATACCACACAACAGTCGTGATGCACCCGATAAGCTCGACTCCCGGTGTCGCGTCTCATTAATTTCGGAGTGGAATTAACTCGCGACGGATTAGCAGCCCGGTCGCGCCACTCTCGAGTATTAGGACTGCAGATAACGGCGCAGAAGGGTTAGTTCCGATCTGCGTATAGCCTGATGTGCCCCATATATGATTGTAGGAGTCGATCGACCAGGTGACGTTCATAGCATCGGCAGATTGGTATCCCCTTACTTTCTAACGTTGTGAAATGTCCCATAACCGATATCGGCCATCCCTTCGTATAGTACAATTGGTGTTCCGTTAATCTTCGGACTAATTGTTTTCTGAACTGGCCGTCAAGGCGGAAGGATTGAACCGAGACACAGCCACCCAAAGCAGATGTCCATGGGGCCTGCCTCCTATAAGAGATATGCAAATGTTCGCAGCACGCGCGGCTTTTTCGGGCATGCATTACGCATAAGTCCCGAATTTATATTGGGCCGTGTCGGGAGCAGAGACACGCTACACTAGGCACGCTCAGGATTAGTGAGGGAACCAGAATTTCGTGGCCTTTGCAGAGCGGACAACCCCTGTTCTCTCCAGCAGACGCGATCCGGTCTTTAAGCGTTCGTAGGATACCGTCATATGTCCTCTCTGGCTACTGCGGGAAGACGAATTTTTCGCCATGATCTTGTTCTGAGATCTAATAAATGGGCATCAGTGTTGCAGTTCAATCGTTTCTATTCGAGGCCCCGTCGGGTTCTATTGTCTGTCCTATCAATTCGCCGTTATAGATAGCTTACTAGGGACCGGAACGTCACAACTCCTGTTCTCCAGAAGTAGCGCCGCCTGGCAGGTCAAGTAGTTCCCTTCTTGATTGTGGTCGGTAAATGGGGCCTCAATCAGATCACCTGTTACATGTGCGTACCTGGGTGCATTGAATGATATTTAATCTTGTACTAACTCCCGTTGGGAAGGAGTAGGACACTTAGCAATCTTCACTGCAGGCACGGTATAATTTCATACATGACGGCTAGCTAGTCCCCGCTCTCTACAGAGGTACTCTGAGTAATCGCGGCCATACAGTTATAAGCACGCGGTAAGTAAATCTTCCCTCCTACCCGATTCGTTCCTAGCAACCAGCAGTGTCGGTGCCCGATTTTACAAGGGGTAGCAGATACACCCCGCGTCCGTGCTAATTAGACGCATGCAAGTCCCAGCAGCTCACGGCCTACCTTAACCTCTCTGCGAACGTTATCTTAAAATTAACTGTCGTTTTGTGAGTTCGCAGAACCACATGATGTTTGCCAAACTTATCATACCCTATCATGAGTACTGCATATATCTTAAGTTAGAATGAGGATTCGCAATTAATTCCAGCGATGATCACCCGTGAAGGAAACTTTCTTTACCGGGCACACGATATCCAGACGACTTAGCAGGGTCACTTGAAAGAACGGGCTGATCAAGCTATCGGTATGGATTAATGGATGTGTGCGTAACTTCTTACTACTTATGGAGTGGGAGTATTAATATGTTATCTTGCCGCCGGGCGATCGGCAGCTGCGCCGCACCAACCTCCGCATTGCTGTGTCAGTTGTAAAGATACTAACACGCTACGTTAAACCTATCTTACCGTATAGCCTATCGCGTTCGGGACATGCCCTGTCTCGGATTGCATAAAGTTATGCCACAGAGATCCGGTGCGTTATTACGATAGAGCAATCAGACCAGGAGCGGGTGCGCCCATGTTACACATGTTCCCTTCCGGTGATCCAAACTGACACGTTCAGCAACGACCATAGAGTCGAAGATATCCGTATACCCTACAGTTCGGATGTGATAGTGTTAGGATGACTGGGAAAGCGGTCATGGACGGTGAGGCACCCAAAGATCACTCCTGTGGCGGGGATGTTGTTAAGTCGTAACGCCCTATCTCGGTCAACCCTTAGACAACATCCTGGGGCTCCAACGCTGACGGCAGCTCTCCCGCTCACCCCAAGTCCAGTGCGTACTAATCACAGCCTTACTAAGCTTCAAGGGCCCCGTGATTCGGGGCGATCCGTAACCGCCACTGGCAACTCAGGGCTTGACGGCGGAACGGCTCCCCTAGAGGTCGATGGAAAAGCGGGGTACCAATAACCAGATGCCTCCGATCGCGGACCGTCACGGCCATAGGCAAAAGCTAGGAAAGGAGCAGATCCCTGGAACGGGCCAAATATGATAGGGGAACGTAGGAGATTGTGTGGGCTGATGTCTAGCATTGGCGCAATCCGATACCCGAACGTTATCTGACTCCTGTCGGCGTTCACAGGTGTGCAAGACCGACCTCAAGTCCCGCATAACTCAGGGTGGGCGCCATAGGCGTCACTCACGCAACTGTAGGGGCCCGTATCCTCGGGTCGTGATTTGGCTGGCCAACTCTGAAGATGGCCGATCCACACGTATTAATAATAGTAAGGCATACTTTTTAAAAGCGTTGCGCACTTTGTCTGTTGCGGAACAAGAGATGTCGTGTGCAGATTATGACTATAAGCCATGATATCGATCCAAATGCTGTATTTAATTGGACGCATCATGCTTAGCCCTGTTTATTTCTATATAATGTCACGCGCCCTCTCCCGACCCATCAATGGACTTCATGCCTTGTATAAAGGACGGCCATCTCTATATGTCCCAGGCCATTTTGGAGCCCTTTCAGCGCCCCCGGCTTTAGGCAAGTGGTTCTGACAATCGGATGTGTCCAAAAACAGTATTCTGTAATACTCTCACTCAATAAGCAATACCCATCACCGTTTAGTTTCAGCGCGCTCTCAGGTGAAAGGACCGCCGGAAAGTGTATCTTAATGATACCTAGACTGAGCAGCCAGCTTCGTTATTTTGCAAAGGACAACAAAATTGCACATCGGCCTACGGGAGACAAGGTGTTAATGGAATAGGGGTACGGAATTCTCGCGGGGACAACCGTCAATCAAAATGCGACAGTGCCGTCTCAGACATGGGGGATACAATGCAGTTGGTTAATGCCAAGTATCCCTTCTATGGCTTCCCTGTCTTAACCCGTTCATACTATAAGGAGACGATGTTTTTATGACATAATGAAGATGTGTATTCCCGTGCCGCAAAAGTTCAAGGTCGCTAATAATGGCGGATATTCTTAACCGGACCCAATCATGCAAGTGCGGTCCGCTGACTCCGGATTACTCCTTACGGGGGGCCTAAAATATACAGAGCCGCGTACGGTTGTTTGGAAGCTGAACTTACTCGGTGTCTTATGACTGCAACTCGAAGTAAGCTTCTAATAGACCGGTCGAGTAATGTCTAGTCTCGTACGGTAATCTCAAATAGGTTGCCAAAAAAGAAATGTTAACTACTGCGTTCGAATCGATGATCTCGATTGTGACTCTACCAAGGGTTCTTGTATGATGCTTTGTATAGCCGATGACAGATTCCTCACGTATGCGTAAGTGGAACAGTATTTGGCGTACGCGGGCGAGCGGTAAGGGGCTCGTGGACGAGCGAATAATTTTCTATATGGGAACTCTCCTTGCATGCCGGTCCAATAGGGCGAACCCGACAGATTCTGTGGCAAATTGTGGTATAAAGTTCCAGCTAAGGGTTCCCCGGGGCCGTATCGTAAGAATAGGTGTGCACAGAGTCCAAGGCCCCCTGCCCGTTATCGTATGATCCATTAGCTCTGGGCACAAGTTGGCTATCATCTGGAACACGCGTAAAGTGGACGTTCAGACATTCGTACGTCTCAATGCGTCAATGGTAGAGACCCGGCGTGCAGAGGTGTCATACGCCATGCATCGGCGAGGCGGATCGGCGTTAAGGGGTGCGTCGGAGACGCGCAAAACTGATTGGACTTTAAGGACGTTCACCGCCCCGCTATTCCGCGAACACTGTTCGTCGCGATCTGCTTAAGTTGCGGAGAAGAGGAAGTAACACAGAGTAACGTACATCGCTCGGATTCAGGGACGGTATTTGATTAAAAGGGCGGTATGCGCATAGTTCAAAAAGTTGATAGCTACGTTGCATCGTCGACCATTTGCAAGACGTACGCTTTCCATAAGCTGGGGTTTGGTAGGATTCTCGTTGTACGGCTCCCCGACTATGGATAGGGGGAAAGAATTCGGCTCCTCTGTGCCTCTTACACTTAGGACCAGGCATCTCCTACCGCCGAACATTTTGACGCAGTATACCGGTGCGCTACGGCGAGAAGACAACGACGCTTAATAACGGCTGCCCGCTCTTTTGTAATTGGGAGGGGATCATCGTCCTAGTAACGAGTCAGGTCGACGTTAAAACAAGGCCGCTAATATGTAGGCGAGTACCAAATGTGCGGTAGAATGAAGCCAGATCAACTTCTGCCTGTGTTTTCATTCCAATGTTTTACCGAGGGTCCGAGAACGTATACCTGAGTCGCTAGTAATACGGCTCTTCCATTATTTCTGGTCGGTACCGATCGCGCCTGGGCTGGTCGTAGCGCCAGATTTAACACTGTGAGCGCGCCCTCGTACATCTAAACGAAAGCCGCCCAGGACCATTTGGGACATGAGGCAGTCCTTGGACAGTAAGTACCTTCCTGCTGGCTGACTATCCTTCTCATCTTCCATACTAACTCTACGGGGCCCAAATATCTGTTAACGAACTACTGTGAGCCTGACGACTCCTGTTAGTATGATTGCACGTGCTAGGTGCAAGCGAACCTACCGCTCTGCGAGACTCAGGCTAGAGTGTGCGTGAGTCTTCAGGCAGGTATCA +4 ATCCATGGGATCAAGCAAGTGATATCCCCCGTATCATTCATCCAGGCTTCCGTTTTAGACCTTCGATGCTCCCTAGGATGGTATAACGCTAAGTGCACGCCAATACTTGTGTCCTAGCGCTCGGACATTGTGGGTACTCTAATCAACATCTAACAAAGGGCCTCAGATGCGCCCTGTTTCCGTACCGACTCAGCGTGTCGATGTGGCAACCTCTACGGGTTGCCATCCTTGGAACCCCAACGCACGGTAATAGTTGATGCGAACCCCACGGTTCGCACAAGAAGAGCATGATCTCCAGCACCGCATCTAGGTCTAGATAGCCCCTTCCTTTTAGTGAACCGTACCCACCGACGAAGTCCCGTTCCCTATTAACGCAAAGGAGCTCGCAGTTATAGCTAAGGTCCGTAGCACGAGCCGTGATGCAGCTTGTCAGCCATAGGAGGAGGGGCAGCAGGTAGTAGTAGATTTGTACGACTTTACGTATGCCGCTAGGTCTACGGTCATCGTCACATTGACCAGGACTAGATCCGCCCGGATGGCAACTAACGAGACTCAAGCTGATTGCACGGTCCTGGTCAGCATTCACTGTACCTCCAATTTCGCGACACAGGAGTACGAATCCCAACTGCGGTACACGTGCGTATTGCAGCCAACGTAAGCCGGATATCTTGCAGCACATATGGTGCGATTTTCCCCGAATGAATGTTGGTATAAACCCCAAACTTCGGGCGGATGAGCTCATCGGACGTTCAACGATCTTTGCGACAAAATCACGCGGACATGGGTATTATGTGGCCCTCGGTCCGGCTATTTCTGCTACTTAAAGCTATCCCGTACCCTCTTTTGAAGGTGACAAATGCGGCGGGATAGGGCCTTACACGAGACCGAACCCCTAATGCGTACATTAATGTCAGATTCGCTAGCTCCACGATGCTAAGACGGTGGGCACTCAAACTGAGTGCGAAGGTTACTGACACCGGGACCAAGTTCACAGTTCCGCCCCATGTTAGACTGTTCTGTGAACTTTGTCTGGTATTTGTGGAACCGAGGATATCTAAGCCGTTTTTTAATAAAACATCAAGAAAACCGTCCCAGGATGGGCACAGAACGAGTCGAGCGACTTGCCGCCCGTTTACTTGTAATGTATCAAGCCTCATTTCTGCACATCATTTCAAAGTTTATGAGGAAGCCCCAGACGGTCGTGAGTTAGTACCCATTCAGTCTACAAAGTCGAGAGCACATGACTATACCCAAACACGTTTAGTTAAGAACGTTTGTACGACTGTTTGCTGTATATGTGACAACTGTCGCCTCAGGTTGTACACACGTTGCCCACACCATCATGACCTTACCTCCCGCTTTTTGAACGAAGTCGCGCCACCTGTCGCAAGTAGTTGTGCGAGGGAGTTCTATTTCCGGGATGATCATGAGGTAACCATCATGCAGCTAAGAAGTCAACACCGCGGGAGAGACAGAATTGTTATAAGGCGGCGAGGAACCCTTTCGGGGTTTCCCACTGTTATGTCTATGCATAGGATTGCTCGGCACGCCGCGTATTCATAACCGTCCGTGAATAATGTCGAGGATATGTCAGCCACCCCGACCGGAGACGTTACGCTTAATCTAGCTAAAGTCTAGCTCAACCGTAAGTACTCCATGGGTTCTACGTGATTCATTTATTTCAACAGTCTGGCCCTTGAGGGGACAACTCCGATAGTAGGGTATTAAGTCCCCGCCAGTATTTCCCTCTCTTAAGACAACTAGGCGGCCTTTCTCGAGCCATTGATGGGATAATTGTGCTAAAATGTAGCAGATATGTACGTTCATATACACCTCGGCCTCATGTCTGTGGGATACCAAAGAAGAATCTAACGAAGCCTTCGCCTAGGTAATTCAATAATTCTTCTAAATCCCCCTGCTTGGCTATCCTAACCTTACGCATGCGGCTGTAATCGATATGAAATATGATTTGGATGTGTTGGCGCGGACGGGGTCGTGGCGACTACGCCTCTTTGCGCTTCACCCTTGGAATTTGCCAAAATTTGCATATCTGCTTCACCCCTTGCTCCAGGGGCCCAGTCATAGCGTCCGTCGTTGACGGATATCCGAGTGATCTAATCGATGTTTGTGAACGTGAAAGGGTCCGCACCTAGGCCACCCTTCTAGTTTCTCAGCGTCTCAGCGTCTTGCCCCAGGGCGACGAAGGACAAAGTATACTCACGAGGCCCATATGGGTTAAATCTTGAGCATCTGCAAGTGCATATCTTATAAGTTGCTCACTAACTTCCACGTGAGAGTCTAACTAACGAAACCGATGCAGAGCGCTCTCCAGTACTTTCTGCCAGACTTCAGCGGAAGTCGGCAAAGTCACCCAATCCTTACACCGTTCTATCTCCAATGATCCTTTCCGACTTCTCCCCAATCTTTCGTAGGACAGGTTGCCGGACCAGGACAGTGGATTGAGACTCCCAGGGCGATTGTGCCGCGAGCGCCTCACAGCCCAGAGTGCACGAGTGATTATATCATGAGCACTTTATCGTCGGCCGATGTGGGGCCTCGGCTGATCACTACATGAATGGGCTGGCAGCTTGATGGCAGGGTACCATTGAGTCTCCATACCTAAGATTCTGATGTATTGGGGATTGGGATAGAGTAAGCGTCAGGAGACCGCAAGTGACTAAGTTGGATTAGCGTAGGTGCCACTGTTATGGTTACCGACTAGACCTATTTCGCTGAGTCAGGACCAGATATTGGTTCTGTTCCACTCAGGTTCAAAGATAGTGGCCGACACGAGGTTTGGCCCTCAGGCCTAGTATGTGATGTTTCCGTGTAGCGTGACGCTCGGCTTCATCGTACGTGTCCAGCTCGACAATTAGTCAGTCACGGGGAGATATGGGCCAAGCGGCATCGCAGCCAGCCCTAGGAGCGCCGCGTCCTTCAATCACTGAGGGCCCGTTAAGGGGGAAAGGGTACATATGAGAAAGGGGGAACGTAGTCAGGACCTCTCACTCCAGATGGCGGTGCTAGGGGATGAGACGTCTAGATTCAACTTCTTGCATTTCCTAAGTGGCTGTTCGAATGTTTTGGCATAGGAGTGAGAGCTGTGGACCCAGGTATGGTGTCGACCTCCGGCTTACTCCATAACGGAGCTTACTAGTGTCAGACTGGTAGCGATCGTGCATTATATCGCAAGACTATTCCGTTCCGCGCATGCGGAGACAAATTCCTGCTTGAACTAGGCGGACGCCTACTATTCGTAATCACGACGTAAACGCGCTCTGCCAATTGAGCTAATACTTGCGACTCTTACAGCACAAATAGTAGCTTGTGCCGATCGCTTTAGCTATGTCCATGATCACACCGATCTGGCACTGCAGAGCACCGCGAGGCCATCCCCTTCGTGAAAAAATTGATTCTATCGCGTAAATCGTCGTTGGGCGTCGCCGACGATGGCAGCAGTGCAATTGGAGGGCTGAGACCGATAGCGCTAGGAGTGCTCCACCTTTGACGGTCTCTCTCGTGCGGATAACTGTACCAGTTTCTGTGTACTGTTATCGTTAAGCCATCCGGACTGGTGTTTGTGGACTACGCTCCATTGATTCACCAAAGGGCAGGATTCTAACATCGGTTTTACTTAGAATGATCTAATGACTAACGAGATAGTCAATCACCTAGGGCGAGTTATATCAGCCCTATTCAAACTTGTCTATCACATACCCGTTGATCCACGGTGCCGTACGGCTCCACAACGCTCTCTGGCTACTCGAATAAGATTTCCAATATGCCTTGGGAATTCTAGAATATACCGAGCCTTAGTAACTGGAGACCCCACATGGTGACACGTCTAATATCCCTTCTATGTAGAAACCTTAAGAGTAGGTTGCTCTAGGAAGGAAGCTCTGCTGCGAGTGGTGATAGTTAGTGACCCACAGCTAGCTGCTGACGTAGAAACATACCCTACGCTCGCCGTTAACGTAGATGATTATCACATAGCACTCTGCGCGGACACTCAGAATAGGCTACGTAACTCATACTCTGACCCGGCGTCAAAAAATTTGTTTTGCTGTTTACTCGCAAGTCATTTGGCAAGCCCGAACCTTACTAAGCAAGCGTAAATAATGCGTCGGCGCAGTCTTCCTCCATTGCCAGACAACAAAATACCACACTAAACAGCCGTTTCATTATTATTGCTAGTTCACTAAAATGACGGGGGTAGAAAGCTTCCCCCGAAATAGTTGTTCTTCGGAATTCAAGGCAGCGTAACAGTCCACTGGAGCTCTGGCATCCCGTCACGCCTGTCTAAGCCCTATTTAGTTACGACCGGTTCACACTTAATGCAGGCAGCGGTCCTCGTCACGAGATGGTTGGCATCAGCCATTTTATGCGCCCTTAGTACGTATTATGTTTGGCTACCGTCTCCGATGATGGTAGCTCAGGCCGTGGAAAGGTGAAATTTTCGGGCTGCCCGGTTGAAATTACCCGTTCGCTTCGTTCGGATGACTCGCTCCCAAGATCAATGTCGTGGGAGGGGAATAGTGTTCCAAGTGATTGCCCACGCAAGTGACCCCGATTTTCAGTATGTAACAAGGTTGGCGGTTGCACGGGTGAACTTCCAACTCACCACCCGCTGGACGGCTCTAGCTTGTGCATTGCCTTAACGAGGACGTCCAGGCCCTGCCGATTCTAAGAAATGTCAACCAGTTTCATGGTCATGCTGTCGGAAGGGATTGACGCGCGAGCATCTCGTAGGTGAGACAGCTTTATGCGGCGACCAAATTGGTAGATAACCAGGCATTATAGGTGTAGCGGGCCCGTTTCAACACAAACTGACACAGAGGTCTCCATCCTGAGGCCCTGTGATGGTCAACCCTCAACCGTATGACTTGTGTGCTGCAGCCTCCTTCAAAAGTAAAATATAAGAAATGTCGGGGGGGTTATACCGTGTTATGCCGCCCAAATTCGTCGGACTCCGTGTCGTCGTCAGGTGTGCCTTCTTCGTTACGATACGATTAGAGGGAAAGGTTCCCCAACTCGGACGCTACACATGTGGCAAGGGCAACCCTCGTAATACTAACTCAGATTGGATCTTACTGACTTTTTTTCAGGTCAAGGCGTAGATGTGTGATGCACGGATACACGCACTTCTAACGGGGCGACCGATATCACCCACGTCGCGGAAACTCAGCACCCTTCCCGTTTATCTTCAGCTGTAGAATCCGGAGCCGACTCCCTACCACAGAAACGGCATCTAGTTACGGTTTGCGAGCTCGGTCCTTGTTCTGTGTATCACTTACGCGCGTGCAACGGCCGAATGTACATGCAAGGTGAAAGCCGCGACCAAAGCCGACTAAAGGCCAAAGATGTAAAGTCAGCCAGACCAAAGGCAGCAGTACTCATGCGGTGTCAGAAACTACGGTGGACTGGTGGATAATACGACAGGCAACGGCAATCACTCATGATAACGCCCTAAGCGACAGTAGGCATTCTACCGGCTCTGTTCCCGGGTGACCGTGACCATATCATTGATGCGCTGTCTTGTCTGTCACGAGTACCTACATGACTTATGAATTCCTGCGCTCGTGACATCCTCTTCCGTACGGGCGACATAAGAACAACGCAAACTAAAAAAGCCGTCAAGTAACGCTACGCACTGACACAACATGGTGATAGTGAGTTTTCCCTAGCTAACACGACTTCTAGCCTTGAGAGGCGGCGCCTGTACGAAAGTCTATAGCCTTTGCGTCGTACATCATGGGGGACGAGTTTTTATCACTTGAGCTGGTACATGGTTGAAAGACCTCGCTGCCCTTTAACCGCTGATACCACAGTCCCATGTCCACAAAGTAACGAAGCGGAATAATAGATCGTGTTGACATCGCCGAGTGGTAAGGAGACATATGCCCCCGTGTTAGATTGAATGCCTCTCCCTGCTCTCATTGGCTACCTCACTCCTAGGTCCACTTGGATTAGCCCCTGTTCGCCCGATCATAACTTTTGGGTTATTCATTTATTCTATCAAGGTATCACCCAGTCGATGCTGGCCTGCCTTCCGTTGCTTCTTGTAATGTCTTACTCTTGCTTCCGAATAGTCGCATTCGGCATTCCACTTGTCCTTTGAATGCACCGTCTCAATTGCATCCACTTATACTTCAGTGCTCCTTTACTGCGCGGTAGTCGAATTGACACTGTTCAGTTTAATAAACCATGCGAGAGCTCTATTAAAATAGACTTCTGGTATGAAGGGTATCGAGCAAACAAGTATACAATTCTACCATAGGATGTTCATGAGAACGTTGCAGGTGCTGCCAAGTCCGCTCCCGTTGTGTTGATACCTCTTCTTCACCTTTATTGGCATGGTATCCTAACGTTGGATGAAAGGACTCCAGGAGTCTATCCTATTTACAATTCAAAGAAGAATTCAGTTTGACGTATCCTCGTGAGACGTCCTGCATCGGGTGAAACGGTTTCACGCATTCGAAAGAGAAAGGCAGCGGTCTTGGGCGAACTGTAACGTCCAATAAGGCTAATTCTCGCTCTGCCTTTTATTACACTTGCTACTAAGCGCCGGTAGGCAGATCTATGAATAGTTATCCCAGACATAGACGTTCGCATGCGGGCAGTTGGCTCAAAAGAGGTGGTACCCTGGACTACAGGCAACCTATTATCTCTATTTTGGTCGAAGGTGATCCACGACGAAGGGTCAGTCGCCTTCTTTAATAGCTGATCGGCTTGGACTGGATGTGAGTACACCCATGCCTAGCGTTTTGCGACGCTTGCCGCCCGGTAAGAGATATGACCAAACAGTCGCCAACGAGCAACGGTGAGGGCAATCACTCAGTGGGGCAAATGCAGTGTGGACCGCACATGCCTGGCTCTCTAAGAACGATTTAGACCGAAGAGCGCAGGTCAAAGTAGATTGTACTCGGAACTAAGGTTCTCGGGTTGAAAATAGTAACTCCCTGACAAAAACTTGAATTTTGCTAGTCATTGGCGGCCCAAATCACATTTCGACCTGCAGACGTTTCCGGTGTTTGTACGGTACTTCCTGCGAATACGGGTTTTCGTCACGAAATTTTTACTTCAACATTTAACCGGGCATGAACCGATAGTTAACTCAATGTTTGGGTCTGATCCCGCCGGGCGAGGCGGCAGGCACTTCGTCGATATAGCATGGATCGCTATGATTGAATAAAGGAGTAGCCAGGGGCGACCACTGTGAACACCTCTACTGGTCCAATCCGTGTTTGGATTATTAGCAGTGGAGTAGTTCTGCGGTTCTCAACTTTCAGCCCGAATTGGGTTAAAAGGTATAGCGTATGCGCCATGATATCGATCAGAAATCATCCGGAGCAGGTACGGAGTTGAAGACGTAGCTAAGTTCCTTTTCGACAACGAGGTCCCCGAACTAGGAGAAACCCTCATACTATTACGGGGTGGTAAGCAGCTTGCGTAGCGGCCTGCCATCCCTGGTACTTACCGGAGAAGTCGGGTATAACCTCTATATCTTATGCGTTCACCAGTAAGCTTTGGGTCGAACTACAATCCTTCTACGGAACATTTGAAAAAGCAGCAGAGTTGAGTTCTAGCCTGGAACGCATCGACTCGCTGTCCGATCTCTCTTTTGATCAGTACTTGTATCTCGAGCTATATCAGACCGGTGACATGCGCCATATTTGAAGTAGTATTCCATGACGTTGGGGCTGCTTGGTCTGCGCCTACGTCTAGCACTATCACCGCCCTTCCTTGCAATTACCTAACCGCTTAGGGTAGACCTTGGGAACACGTCCGCCTCGTTGAGTACTTCGAGATCCGTGAATGGTCGGTGATCAAAAAATCGCCACAGAGGCGAAGATGTGTAAACAACCTGCAGTACCAAATGCTGAAGATAATTCACAGCCAAGCCAGACGTTGGGTCTCGCAACTGACGGCGTTTTGATTCCATTACTTGCATAAGGGAAGCGGACCGAAGACAGTCTCACGTGGGAACGGTATCGAGATCACTATAAGCAAGTAACCATCGTAAATCAAACGATCTATTAATAGCTCGAAACCAGATGTAAGATGGTCCTTGCCCTGCCGGAATGCAATCACGAATATTGCGGGTACACTGTATTAGTATCGATCATGCTCTTTTCCCGTGAAGTTACGTAAATTTGTATATCCGGGAGCACGAGCAGAATGACTTTTATCCGGCCCGGCGACTGTCCGGAATGAGTGAGTACAATACCAACTGACTGAGTAGTATTTCTGGGACTCGGTTACCACTTTTAAGTAGCCGCATGCGAGTCACGGAGGTCTCCGGTTCAGCAAGCCTGATGACCCTCGGGGAGGACGATTCAAGGCCATGGATGAGAGTGGGCCGGTATTGTCAAAGTTGCGTTATTGAACCCGAAGCGCAATCTTGGTCTGTTAATGTATTGACTAGCGTCGGGAAAATTCTTTAACGCAGAGGCGTCTTTGATTATCAAGCGCTTGAAACTTTTTGGCGGAGTGTCACCACTATTCTCGCGAAAGGGATTAGCAAATTGAATTCAATGAGGGCACTGTCCTCTTAGCTTTCACCCCAGCCCTCTGGGTTTCCGCGTGGGTGACTATGATGTTCTACTCCCGGTCCGTGTTGGTGCACTCATGGTGAAGCTCACGGTGCTTTGGTCACCCTAATTACATTGAATTATGCATTGAAGTATGCTTGTCCATCCCTTTTATGTCAAGACGGCGCTCCGGTCTACGGAATATTACTACACCTACTCTCTGTGCTTGTCAGACTAACAAAACGACTGTGGTACCATATTTGAGACGGAATCTTGAACAAAACCACAGTCACTCACTAGAAGGACCTGTGGCGCGATTGGCCCTTTGGTGCAGAGATTACTCGAGGTGATCTGTGGCGGGAACCGCGAGCTATCGTGGTTACTTATAGCCGGGCACGGATCGCGAACTCGGCAAGACAGATGTCGTAAACACGTAGTGTTTAAGGTCGTCTCCCGATGCCCAGATTTAACCCCTTTCGCCTGACACATCCTTGATGTCCGCCGTCCCGAGCAGCAGTGCACGATCTACGTTTACGCCTCGCGCTAGGGGAAGGCGGATGGACGATGGTTTCACGTCGTGTATTGAACTCTAATCACTCAGAATAAGCATCCGTAAATCCGCTGGTAGACACCTACGCTGAGGCCACATATATGCTGTCTGGCATCCAAACTATAGAAAGCAAGTGGGCTGCGCCGGTAGGAACTCTAGCAAATCGTCTAGAGTAATGTACGGTGTATGAGGCCACGGGCGATACCTGTACTAGAATACCTGGTGTCTGGCGCCGGGCGGGGCATGCACGGGAATGCCCATCTATTCCCTGATAGCCATGGGCCCATCCTAGAGTTTCATATATTCACCCTAGGCAGATTTTACCTTAAGCATAGTTCTACATTCTTAGACGAAACGTTGTTCTGGCGCGTTTCTCATACCATGTATGCTTTAGTTACACCCGGGGATGGGCATTGTATCCCATCCGGTCTTCTTGGCGCCTATGCGGCTTTGATCAAAAGAGGATTTGTCCACGCGTGCCGTTCCGTCATACGATATGGCAGGCCAGTGCCCAACGAACTAAACCATGGCATAATAATCGCCGAATCGGTGTGATCGATGTACGCGGAAAAAGGTAGCCTGCGTTATCAGCTCTGCTGAATACGACGTTTTTTGCTTCGAGTGTAAGACTCACGTATCGAACCTCCGTCCTTCACTTGTAATGATGGCAGGCCGATCAGTAACATCGAATCGTACTATATCAATTGCAATACTTGTAGATGTCCGGCCTCGACAAACCAACTGTAATCGGACCCTTTCTATTTGGAATAGAGTGCGCGGTTGTTCGTTGGTTTTCCTTTATTAATGGGAATTATCAGGGTAAGAGTGTACATTTACATTATAAAGCAATGGTGTCTGGTTGTAGTGTGTCCCACACATTCACTCGTCATTTTAAGCTTTACTCAGCCTGTTGTACAGA +5 CTTCACCCGGCCGCTCGCGGCTCGTATGTCCGACAGCGACTACACCATTGTCCCTAACTTTAAACTGTAGAGCGAATGTCAACGTGGCCGTACGTCCCCGGGTATGATCTGCGGCATCGTTAGCGTGAACATGCGGTAGCCAAAACCTTATTGACCAACCTTTGGGAGGAAACCGCCTTTACAAACGCATTCTCACGATACATTCGGGGCGTAACTGTCCGAAATGATCCAAAAGAATCGGGAATAATTAGTACCTAATCAATTCGTCGTCTCGTAAGAAGGTAGCCGTTGTGTGGCGTCACTGCTGGAGGGCACTATCCGCAAAGGTTAGGCCGGGTGTACCATAGCATCCGAATTCATCCGATTGCGGTATTCCAATAGCCGTTGAATTAGTGGAATAATTTGAGGCTGGGTCTGATCATCTGACTATTATGCCTGACTCCTGCGGGATTTGAGCATGAGCGCGCGATGAGAGTTTATAGGGAATACTGGAGCGTGCAAGCTGTGCCGGCCTCTACTTTATCAACTTGGGCTCAGGGCAGGTTGTGCCGATTGTGTGTTACGTTTAACTCTGTGATGGGTCCAGGCGTATACGCAATCCGTATACTATAAAGGTACAACAACTCCACGCGACTAGTGGGGATTAATATATGACACCGGTCTGCGGTAGGGGAACCCGGGGTCGTCGGTAAACCTCGCTCTCCTCTGCTGAAGAGACGCCTGGCCACATGTCCACCTCAGCGGGCCGTACAAAGTCTTACATCAGTGATCTGCGTGCTACAAGAGTGGTAAGTATGACCTCTCTGAGTCGATTGCACTGCACTGCCGCTTTGTATGAAGTACTAGCACTGTAATGGTTTGAACAGGGGCGGCTACGCACCCGCCATTACTAGCTACTCTTTCACGACTTGATGGCTAATCGGCAGGCCTACCGTCCGCGGACGAACTCGTATTCGGAGAAAGGTAAACGCTAAAGCCGTTCGGTGCCCTTCACAGTCATCGACGTCCAGTATACCCATTCTGACTGAGCGGGAGGTAAAGCCGTGAGTATATCCGTTCGAACGGCTATTCCAGCGTACATCTTCCCCGTAGTAGAACCGTTGCAGATGTACGTAACTTCCATTATTATATGGACTCTGAACTAGGTAACGGGTCCCGTAAAGGCCGAGATAGATCACTGAAAACCCAACATGTCGCTCGGGTCCCTAATCTAGCCTCGTAGAAAACGGGTACGGAAAGGACCTATCCTACACCAAAATCCGGCACATTCAACTCAGACTTATACGATTATTAGAGTAAGTGTCCATTTAGCTAATACCTCGCGGACCCCCGAAGATACGGCGATTGATGGTAACTCGGTTTCGCATTACATCCTTTGTCTATTCGACTAAGGGATCGTCCCAGGCCCTGAACTCCAGTGAACACCCACACCGGCATGCCATGGTACACACAGTCGGAACAACTATAGAATGACGGTGGATTAACCCGAATGGAGTCCGCACCTCAGGTGTACTACAGGTACTAGGTTAGAGCGAGCTCCTCTTTTTTCTTATGGTCAAGGCAATGTACAACCGTCCGATGATCGCGCGTGTTGCGGCTCTTTTGGTCCACAAGGCTCAGGAACGGGCGAAATATCCCCGAGCGTGTGGCAGAGCCTAGGCGTGACTTACCGACGGCGTACCGGCTCATACGTATTTTACCGGGCGCTATATCATAAATATGTAATAGAAACGGAGAATCTTCCCGCCGGTTGGTTTGTTAGCAACTCTTTGTATATAGTTCTGGGCGCAAGTTTACAACAAGTTAAACGGGATTGCGCGCGGTCCGCTGGTATTAGATCCGGGGATTATAATCCGGTACGGGGTACTAAGGGAAAGTCTGTTTCGGGATTCAAGATAACCGAACCCGCCAACCGTGTAAGTTTTACAGTTGCCCTGCTGAGCGAGTCGACACCCCTTATTTAAGTATGTCCTAGCAACTTCGAGGCAAGGCAAAGAGTGTAACGGGATTAGGGCCTGACTCTATTAATGGATTGTCAGAGGTAATGGAAACCACCCACAAGGGTTTACTATGTTTTGAGACTCTTGGCCGACGTGAGCGAGCACAAACCTCCCCATATGGGCAGTCGACGTGTCTAGCCCGTGCGCCAATGAGGCAGGCGCAATGGTCCTACTTGGAGAGCCATACATTAGCCGCCGCAGCCGGACCCAGCCGCCGTCTTTCCGACGAAGTTATGCGAGCAAGCATATACGCCAGGCCAGGCTCCCAGCATGAGCCCAAAGTTTGAAGGGCTGACAGGCATCAAAAACACAATGTGTTACCATCGACCCATCGAACTTCGCTCGCTGGGTCGGTCAGCCCGGACTAAAGAGAGATAGAGAGTTTAGGGCGTTCCTCGCCTTGATTCAAGGGGTAGGGCGCCTACGACTTGATGCACTGTGTTTCACATCAGTATGCGGAGTTGTCAGCCTCAACCAATGGGAAGCCAACAACTGCGAGCACCGTTAGAGGTGCATGCGGTGTCCGCTAGACATAGTAGTACCTTCAGGCCATGCGATTTGGTTTTTGAAACCGGTCATTACGCGTAAGTGAACTCGGCCACAAGTTCGCCGAATGATCAACTTAAGCGTCCCCCACTTCAACTGCTGCACGCCAAGTGAGAACCGAACCTCAGGGTAACACATCTGGGTACACACGCCCTCCGTATTGCGAATCTCTAACACGAGTAATGGCTCACATCCGAATCACCGGCTCAGGTTAAAACTGGTGAACTGTAAAATTCCGATTCTTGGTCACTGAATGCCCGCGCTACACTGGCTAGGCCCGCTATTTCATTCTTGTTGGGACGTAGGGCGTCGCGTAGGGGGGTTGTTCACCCATAAAGCGTAGCGCCAGGTGTAGCCGGCTTTTGTTTCAGGATTATACGCAAGTGTTGGCCCCCTCAACTGGGCGTCAAGGTCGGGATGTCATTCGCTAGCAACAACGTTAGATTACCTCCGTCACCACCCAACCCTGTCAGGTTCTGAAGCATAAGGGGATCTAAAACTAACGTCGGTCCTAGGTCCAGGCACAAGTGTACTTTCGAGGCGACTTTTTCACAACGCAGCAAAACCTTACTGCGATCTGCCCTCATAAGCCAGTCAGGTCGATGCGGGATCGACGTGATTAGTTCCTTATGTGGAACGCAATACGCGTTAGATCTACCACCACGCATTCATCCATCGACGGAGGCAAATCGCTTATTTCGGTCTCGATTCTGTGTGTGGCCTTAGCGGGACCCGGATTGCGGTGAACAGTTATTTCGGACCTTGTCGGAGCCGGCGCCACGTGACACTCTCATTACGTGCCTGATGTGTCCGGATGTATGTACATAAAGCGTCCTTTGTGATGGAACAAGTCGTCGCAATTCAGGAGGTGAGGGCGGCGAGGGGTGTTGGTGACAAAAACCCCGTGGCCAGCTAGTTTTAATTCTTGATAATCCGCGGTCATCGATCCGATCCCACGCTGTCCTGATTTAAAGTACTGGCTTAATTCCACGTAGGGGTTTTCTTGGCGTGTTCCGTTTACCACGACTGTGCGGCTAGATTACGATCAGCTAAGGACTCCGCGGTGTATCCACAACGCTAGTTATCCCCACGCAGGAAAGTAGCTTTTACCATGTGTGAATTGGGGCGTGGCGTTCTCCCGAAGAATCCAGCGCACAGTTTGCCCTATGGGTAGTCCGTACATCCAAGTGCCCGTGGTATCGTCTTGAGTAGCCGCGGTAAGTATTCATGTACGATAAATCGATTACCCCTCGACGAGTATAAACTCGTGCATCTACTTTGGGTCCTTACTGTAGATGGCGCCGTGATCTAGTCATGTCAACGTCTAGCCCGTCTGGTAGCTATCGCCATGAAACTCATTCGAAAGAAATAATCACCCACCCGTAACAGACTTGGAATGTGGATTTTTGCGGATACTGAACCAGCTGTCCGCCTGGGGGGAGAACATCCCAGAGGGCGAGCAATTAGCATCGCCTAGAAATAGTCGAGGGCCTAGTCGAGATCAGAGGTTTATAAAGACCGCAGGCCCTTGGCCAACCGTTACGTCCTGGCCATTACGACTATGTAGCTCCACGGTGTGTTGAAAGTTCGACGAAAGCCGTTGTCAAGTCAGCACGGATTTAGTACCGTAATGTCGTGGTCGTTCCATAAACTTCTTTCCCCCCCCGATTTGAGTGGTCGAAAGACGAAAATCGTAGCCCAGTACTGTCTCCAGCGGAAGCCCATAGGCATAAGAAACTGACGTCATTCCACCACAAATCAAGGGCGGCCTAGACGATGCTTTCGACCGAAACGGTGCGTGAGCGTACAGCAACCGCGCCCACTAGAATTAGGAAGAAGGCTGTTATTGATGCCACTGTAAATTGTTTTTTGCACATGTTTACACAATAAATGTCACGGGGCCGTCAATGGACTATTGCCAAAGCCCACTCTGGTGGGCCGTCATGTGGTGTCGGTTCAGCAAAAGGTACATGTTCGACTATTGTAACCGTTATTCGGCCTGGGGACACCTCCCCAGATATCCCGCACCCTTAAACCGCATATCGGCCGGAAAGATCCTTACGACCAGGGAGCGGGAACTTGATCGACTGGTTGTGTCATTCTCATAACGGAAATTAGCCAGATCCCGCTGCTATATGATGAAGTGGCGTAACACGGTCACGGGCTTACTGTCCCGACAGGGGCCTTAAAGGGAGTTCGCTACCGGGACTGACAGTAGATTGACACTGGTGGTAACATTTTCGATACTGGTGGTATTACTCGTCTCATCTCCTTTTACAGTTCAGAACGTGGTTGAAGGGCTATGCAGAGCACGTGCCAAGTACGAGTTGGCTAGGATGTCTCACCGTGGAGCCGCTTAAGCCTCACGGCAGAAACATCGATTTCTCCTTGGGCGATGTTAGTTGCCACTCATTCTATGTTGTGTCCGTCGTCGTCAAATCTAACTCTATACAAGCCTCTCTCGCAGGGGTTTGATAGAGGTGACGAAAATGACACAGACCATTGGCGGCAACAAAGCCTAACTGCCTCGAAGCTTGTCCCGAGCGTTTCTGTATATACATCCTAGCTCGCTTGAATTCAGAGTCACCGGCCCGTGTGGAAGGCCAGATGTTCGTAGGCTTCTCATCCACTACCTAATTGATAGCTTGTGTTCCAACTCCGTGACAAGAGTTGACATCGTCTCTCCACCCAGGTTGCACTCGAATCGTATAATTTGAGTCCTAATCATCTGGCTCTCGCTGCGAGATGTGGAACCGCAGACGGTCCTGCCCGCTTGGGGGACGGAGGAAAGCTCCCGTTAACTATATCGCCCGTCGTGTAATATGATACTCTTATTGTCGGGTGTAAATCGAGCCCGAATACGTTTTTTTGTCGTCTAACAAGCTCTACCTCATGCTTAGTATCCACCGCCGACGTAGCTCTCAAACATATTTAGTACTCCGCGACGATGAGCACAGTCGTGTGTAGTTGGTTTACATTGATTAACATAATGCCCGTTGGTCGACATGACCTACAATACTATTAAACTGCCGACGAGTGCTATTCTTCGCGGCCCACTTTACGGGCAATCATTCCATTTTCGCTTCCTCGCAGTCCATAGCATTGGAAGCGGTAGATGCCCGATATGTCCGTTGGAGTCGAACAAACACCTGTTGATTTAGAACCCAAAGAGTCGGGGACTGTTGCGCGGGTTCACCTAAAGAGGACACGTACCAACGTACCAAGAGAAGTTTCGGGGGTTTGATAGCCGGCGATCAATCTGTACGACTGTTCTATTATCTAATTGGTAAGCACAGGATAATGCGCAGGGGAGGTAGTAGCGTTGTCTCTATGCCAGTGTGATGTGAAATTCTCGTTAAACGGTTGATGACCTATCGTTTTGGCATTCGCTCCCCAGCCAGGTGTAGTGGTAGCCGTTGTTTGAGGTACTTGTCGCAAAAGAGTCTGTGCCGTGGTTGTCGTGTCTTATCTCCGATATCTCATCTAGAGAACTTATCTCACGGGAGATTGCCGGCGGCTAACGGTCCTGTGCACTCCGTCGTGAGAGTATGGCCTGCTCCGATAGACGAGGTTTGTCAGTCTGTAACGCCGCCGTTTGTGCGCCCAATTTCAAGCCCATCAACCTCTAGAGCCTAGATAGAAACGCTTTGCAGTGAGGGTACGATGGAGGTATGTCTGGGGTTCTGACTAATTCACAGTAAATAGAGTGAAACGTTTAATGGACAGGTTGGGAAGATGTAGATGCCGCTCTGGGTGTGAAGTTCTACGTATCAGTATGAATCGAAAATACACTCTATCTCATATGACCATTAATAAGTCTTTTGCAGGTGTCGCGCTTGATCCCAGCCGTTATGGCCCCCTTTTGGTCATCCGGACAGAACCTGGTATTAAATATATACCCATCGCTAACCGCCCATAATCGTGTGATTCGACCGAGATCTCGTGCCTATCGCTTATCTGTGCATGTTGTCTGTAGATCACCTCTTTTTGAACGCTTCCTTGCAATCGAGAGTAGCACATCAGCAAAGTAGGTATCTCAGCGAATAAGGTTTATAAGATATGTTGCCGTGCGCGTCCCATTTGGCAACTATCTAGAATACTGCTGCGAAGCGGCATAGGTTGGAGGAGCGAGCCATGAACGGGGTTAAGTGGTCTAATGAACCTCGGACATGAACCATCAGCATCTCTTCATCCCTGTCCTGGAGACCTCCTGCGACATTAAGACACGGAACCTCGTTAACCATCGGTTGTCGGCGGCTCGTGGAGCCTGGAAGTATTATCCTGTGTCTTATCGCCGCTTCACTTGTCTTCGGGCCGAGGTCACGCGCCGTACACTAGCGGCAGTTGCTGGTGCGGGGTTTCATCTCGCATCGGTTGAAATTACTCAAGGTCCGGCATTGATACAAAAGGTTTTCGTGATCAGATTGACACGACCGCTTCGACCAGTCAGCCGTTGTACACTTGAGATTCCCGTTACCCGCTTATAGAAGCGAAGGTATGGGATAAAGAGATTGACAAGTACGAGAGACTCGCGAATGGTGTTTCTGACTTTTACCCTAACATTACTGGTACCCAGACCTGTATATACCGGCTCTACAAATCGGGTTATACTATCTAGTCTTCTGCGATTAGGCGTACAAGAATTCCCTAGTTCCTCGTTAACCTAGCATCCGGGCCTGTTAGATTTGACCGAACCAATCCGTATTCTTTCCATGCGTTCCGCTGTTAAGGCCGGTCGAGGAATTCATGCCTAGTATGCATCCTGTTCGGCACGGCGGCGCCGCCGTACGAAAAGATACTGAATCGACTCGTGAACCCAATGTGACTGCCCCCGTCGTCGTTATTTCGGCTCCGGTGATACATAAAGATCTATTCGGCGAACCGCGGGGCCATCAAATGTATCGTACTCAAGCACGGCCGCCCTGAGTGAGTTCCATTATCATGCAGCATGCAAGTGGCGAGTGTCTCGAACCACCAATTGCACCACGCACTCTCGTCCTCCACTACACACCCTCATTGTTGTTCGGGCCTTCGCTCTCAAATGCTGCAGAGCAACAATAGCGCTACCAACCGCATTATGTCCAAGTGTTCTTTGGGGCTCACTGCTCTGAGACAGAACGATGAGAGGACCTCTGAGGTAGTTCAAGATGCGGCCGGCGTGAGCACGAGCACCCGGCCGTCCTTTAGACAATCTTGGGCAGAACCATTTACTATAAGACGTTCGACAGGTGTGAAAATGTGTTAAACTTAGACGTGCCCCGTACTGTCGTTGAGGGTTCACACATCGAAACAGTGATCGAAGAAGCGGATTAGTAGGGCCATGGAGATATTGGTACAATGCATGCGACTCAGCTGTTCTACAAGAATGGGCCAGACTCTACGAGGACCGCTGCCCACGAATTTGACCTCGGAACACGTCCGTCGGGCCCAAGTATCCCAAATCGTTTTGAATATCGGCCGCTCTGAGCCGACGTCCCAGTCCCGACGGTTGCTCCGCTGAGTGACGTCGACTTGTAAGTATTACCGAACATAAGCCCCCAGTATAAATCATCTTAACCACAAAACACCAGACGGGTAGGGCGCTTACGCATACCTGACGCCGTAGACTCATTGGGGATAACAAAGGGTAATCCGCTCTTCATAGGCGACACTGACACTGGGGCTACATCCAGGATTGAGAGCTTGGCAGGGATTAGCGCCGAGTTGTCCCCCATTTCATACAATGGAGATAATATGCACATTGTTACAGAGATGCTAGAATTCTACAAATCATAGTTTGGTCCGCTCCTAATCTCGGCATTTGTGCCGATGGATCGGTGTCTGCCTAAGCGCGTGGCTATTTATCATCACTATTAGATGTATTCGGCAAGTGATGGTCCTTTTCTGGCCTCACTCAAGATAACCTCTAGGTTACGTAGATGACTAGTACTAGTCCTCAGCATATTCAGCTAACGCCGAAAGTCCTTTCTTGGTACTTGACAACATCACTCTGTACAGCTTTCCTGTCGTTAGAGGGGTGGGTATACTTGCGCGAGAATTCCTGCGCCGAGGCGACATCCCGCCGTCGGGACATCTCAGGTATACACGACATCGCACCGTAGGGCCCGGTATTTGGGCTATGCCTGTTCTGCGCCAGCATGGCGCACACCAATGCGTTCGGCAAAGTACAGAGTCTATTATCATGGCGTTTGGCCCTATCGAGTCATTATGATGGACCGATGACGGCTTACAGGAGGATGGGTGTTGTTTTCCGCGGCATCCCAAAAGCCCTTAGACATGTATCCGTTGTTATGCGTACTGCGTTATCTTTAATGTTATTTCAGGAGTATCAAAGAAGTAGTGTTGAGTAATTATAAGCAGACGACCGGCAAACAAAATTCAACGCGCGGCCGCGATTAGCCATGTACAGCTTGTGGTCGACCGAATGAGTACTCCCACGTAGTCATTGATGCACTCTTGAGTTCAGCCGCCCTGCGTATGGACGCTTATGACTATCTAAACCAGCAGGTCCGAATAAATCGGCATAACGATATCCGGTTGGTTATTGAGTCATACCACACGCGGTCTGCAACTGTTCGCCCCGCCATAACTTGCTGGACTTTCGCTCACGTCTTATACTGTGGTCATCCACGCTAGTTGTACTATCGAAAAACGCTTTTCGTGTGCAGTTGGGATGGGCCCAATATCCCTCAGATCGGTGTCCATTTGAGCAGTAGTATCGTTCGTCGCTCCTGCAGAAGCGCATCTCGCCTGAGTGTATAGTACCCTCTGAGGTTCTGAGCAATAAAAGGGCCAACTCGTTTCTTGATGCACCTTCGTAACTCCGTTCCTACACTTCGTCATAGAAAATCCTCGCCATTTTCCTCAGATAATCCTTTTTATGGTACCGGAATCTTTCGGCTATGATAAGCTGGCCCAAAAGGACACTCTCGTTCCCGACTTCCTCGTAAGGTTGCTGACGGACTGGATTTGACGCGGTCATTATAGATCGTTTCCTGAAGGTACGCGCTCTACTCCATACTTTGGGGGTGGTCGTGGTTGAAGTTAAATGATCCATGTGCCTACGTCTGAACTCATTGTTCTAGGATTCGTAGGGGACCCGCCATGCCTCGATTTATATACCTTTAGCTACCTATGCCATCGAAGGTAGAATCCCATAAACTGGGATCACGCTATTGTGAATTCATTCTCACTTGGATCACCCGATATCGATGGTTCGAAGACTGAATGGCAGGACTGGTGATGTCTGCAGCAGTCATAGTCGCTTAAATTGGCCAGCTCTAGAAAGAGCTGATAAGCCCATGGGCCAGCGACGAGTTTAGTATAAGTCGAAGGTACAGCCTTTAAGCATGAAAAACTTACCAAGGTATGGCTCTGGGCTGGCCCAGCACCACTGTTAGTCGCCAGAGAGGCTCTAATAGGTAATCGCCATCTGGGGATAAGCAACATAGGAC +6 TCCCATGGGCTCAAACAAAGGAGATGCCCCCTATCACTCATGCAGGTTTGCGTTTTAAATCTGCGATGCTCCCTTGGATGGTACGTTGCTAAGAGCACGCGAAAACTTGTCTCTTAGTGCGCGGACATTGTGGGGACTCTCATCAACATCTCTGAAGGGGCCTATGAAGCGCCCTCTTTCGGGACCCTCTCAGCGTATCGAAGTGGCAACCGCTACGGGGTCCCATCCTTGGAAGCCATACGCACGGTCATAATTGATGCGAACAGCCCGGCTTGCACAAGAAGCCCATGATCTCCAGCACCGCACCTAGGTCTAGATAGCACCATTCTTTTACTGAACCGCAGCCACCGACGAATGCCCGTTCCCTATTAACGCACAGGAGTTCGCAGTTGAATCTAAGGCCTGCAGCACGAGCACCGATGCGGCTTGTCGGCCATAGGCGGAGGGGCTGCTGGTCGCAGAAGATTTGTACGACTTTACGTATGCCGCTAGGTCTACGGTCAGAGACTCATTGCCCAGGAATAGATTCGCCAGGATGGCAACTAGCGAGACGTAACTTGGTTGCACAGTCCTGGCTAGCAATCAATGTAGCGCCCATTTTGCGACGCAGGAGTACAAATCCCAACAGCGGTACACTCGCGTGTTGAAGGTAACGTACGCCCGATTTCTTGCAGCACATACGGCGCGATTTTCCCCGATTGAAGGTTGTTCTGACCCCCAAGCTTCGGGACGATGAGTTCAACGGACGTTCAAAGATCTTTGCCGCAAAATCAGGCCGTCATGGGTATTACGTGGCCCTCGGTCCGGCTATTTCTCGTATTTAAAGCCATGTCGTATCCTCTTTGGAAGGTGATAAATCCGGGGGGATAGGGCCGTACACGAGACTGAACCCCTAATGCGTACATTGATGATAGATCTGCTGGCTAGCCGATGCTAAAACGGTGGGTCATCAAACTGAGTGCGAGGGTTACTGACACCAGAACCAAGTTCGAAGTTCTGCCCCAAGTTAAACTGTTCTGTCAACTTTGTCTGGTATTCGTGTAACCGCGCATATCTAATCCGTTTTTTTATAAGACGTCAGGAAGACCTTCTCAGGGTGAGCACAGAATTGGACGATCGCCGTGCCGCCCGTTTACTTGTAATCGATCAAGTCTCATTTCGGCACATCATTTCAAAATTTTGCACGAAGCCCCAGACGGTCGTGAGTTAGTATCCATTCAGTCTACAAAGTCGAGAGCACAGGGCTATACCAAAACACGTCTAGTTACGGTAGTTTGTACGAGTCTTTGTTTTATACGCGACACCTGGCGCCTGAGGTTGTATACACGTCGCGCACTCCATCCTGACCTTACCTCACGCTTCTTGCAAAAAGTCGTGCCACCGGTCGCAAATTGATGTTCGAGGGAGTTCTAGTCCCGGCGTGATCATGACGTAACCATACTGCAGCTAAGAAGTCAGAACCGCGGGAGAGAGAGAATTGTTATAAGCCGGCGGGGAAGCATTTCGAGTTTTCCGACGGTTCTGTCTGTGAATATGCCTGCTCGGCACGCCGCGTCCTCATTACCGACCCTGTATAATGTCGAAGTTAAGGCAGCCACCCCGAGCGGCGCCGTTACCCTTCTTCTAGGTGAAGTCAAGCTCGACCGACAGTACTCCAAGGGTTCTACGTGAGTTATTTATGTCAGCAGGCTGGCTCTTGAGGGGAGAACAGCAATAGTCGGGTATTAAGTCTCCGGCATTATTTACAGCTCTTAAGACAACTAGGCGGCCTTTCGCGACCCAATGATGGGGTAAGTGTGCTGAGACGTCGCTGATATGCACCTTCCGATAAACCTCGGCCTCTCGTCATTGGCATGACAAAGAAGAATCTAACCAAGTCCGCGCTCAAGTAATTCAAGAATTTCTGTGAATGCCCCTGCTTGGCTAGCTTAACCTTACGCATGCGCTTATACTCGATCTGAAATATGATTTCGATGTGTTGGCGCGGACATGGTGGTGGCGAATTCGCCTCTTTGCATTTTACCCTTGCAATTTGCCAATATTTGCATATCTGCTTCAGCCCTGACTCCAGGGGCCCATTCGTATCGTCCGTCGTGGACGGAAATACGAGTGAACTAATCGGAGTTTGTGAACATGGAACGGTCCGCACCCAGGAGATCCGTCTAGTTCTTTGTCCTATCAGCGTGTTCCCCCTGGGCGACGAAGGCCAGAGTATACTCACGACGCCCATATGGGTCAAATCTTTAGCTACTACAGCAGTATATCTTACAAATCGGTCACTGGCTTCCACGTGTGAGTCGAAGCAACGATACCGATGCAGCTCGCTGTCCAGACCTTTGTGGCAGACTCCAGCGGAAGTCGGCAAAGGTAACCACACCTGATAACTTTGTATGTCTACTGTTCCTTTAAGACTTCTCCCCCCTCGTTTGTAGGACAGCTTGCCGGACAAGGACATTGGATTGCGACGCCGAGGGCCATTATCCCGTGAACGCCTCTCCGCCCAGAGAGCATGGGTGATTAAATCAGGAGGACTTTATCTTCGTCCCAAGTCAGGCCTCTGATGATCACTACTTGAATGCGATAGGAGCTAGAAAAGAGGGTACAATTGAGTTTCCTTACGTAAGATCCTGATTCTTTGGGGATTGTGATACAGTAAGCGTCAGGAGAACGCAAGTGGGCAAGTCGGATTATTGAAGGTGCCACTGTTGTGGATAACGACTGGATCTAGTTCGCTGACTCAGGACCTGATTGTGGTTCAGTTCCACTCAGGTTCAAAGAAAGTGGGCGACTCCAGGTTTAGGAGTCAGGCCGAGTATCAGATGTTTCCGTGTAGGGTGAGACTCGGCTTCATCGTACGCGTCCAGCTCGACAGTTAGCCAGTCACGGGGACATATTGATCAAGCGGTATCGCAGCCGGCCTTAAGAGCGCCGGGTCCGTCAATCACTGAGCGCGCGCCAGGGGGGAAAGGATACATATGAGAAAGGAGGAACTTAGTTAGCACGTTTCACTCCAGATGGCGTTGCTAAGCGACCAGTGGTCTCGATTCATCTTCTTGAGCTACTTACGGGTCTGTCCTACTGTTTTGGCATAGGGGTGAGAACAGAGCAGCCAGGTATGCTTTCGACCTCTGTCTAACGCCGTAGCGGAGCTTACTAGCGTCAAACTGATACCCATCATGCATTATATCGCAAGACAATTCGGTCAAGCTCATGCGTAGACAAATTCCTCTTTGAACTAGGCGGACGCCTACGATGCGTAATCACGACTTAAAAGCGCCCTGCCGATTGAGATAATACAAGCGACACATACAGTACAAAAAGTCGCTTGTGTTGATCGGTTTAGCTATGTCCATGATTAGAGTGACCTGGAACTGCAGAGCACCGCAAGGCTTTCCACTGCTTGAAAAAACGGACAGTGTCGCGTAAATCGTAGTTAGGCATCAGCGAAGATTGTAGCCGTTCACTTGGACAGCGGAGACCAATAGCGCTAGGAGTGCACCACCTTTGGCAGTCTCTCTCCTGCGGATAAATTTACCAGATTCAGTGTACACCTATTGTTAAGCCATCCGGACTGGTGTTTTGGGACTACTCTACATTGATTAACCAAAGGACAGGATCCTAACTTGTCTTTTATTCATAGTGATCTAATGTCTACCGAGATATTCAATCACCTTGGGTGGGTTATATCAACACTAAGCCAACATGTTGATCACAAACCCGCTGACCCATGGTGCAGAATGGCCCCACAACACTCCCTGGCTACCCGAATAAGTTTTCCATGATTCCTCGGCAATTCTCCAAGATACCGAGCCTGAACAACCGGAGACCCCGCCTAATGACCCTTCTAATATCTCATCCATGTAGTCACTTTAAGAGTAGGTTGCTTTAGGACGGATGCCCTGCTGCGAGTAGTGATAGTGAGTGACACACAGCTAGGTCCTGACGTACCCGCCTAGGCTACGCTCTTAGGGAACGTAGATGATTATAACACAGCACCTTGCGCGGTCGCCCCGCATCGGCTACGTCACTCATACTCTGACCCAGCGTTACAAAATTTGTATTGCTGTTTGCTCGCAAGTCGTTTGTAAAGACTGTACCTTAATCTGCGAGCGCAAATAATGCATCGGCCCAGTCTTCCTCCATTGCGAGACTACAACATAAAACACTGAACTGCCGTTTAAATATTATTGCTAATTCACTTAAATAACGGAAGCAGAAAGCTTCCCCCGAATTAGTTGTTCTTCGGAATTCTAGGCAGCGTAACAGTCCACTAGAGCTCTGGCTTCCCGTAACGCCTGTCTAAGTCCTATTAAGCTTGGACCGGGTCCCACATAAAGCAAGCAGCGGTCCTCGTCACGACATGGTTGGAAACAGCCATTTTATGCACCCATAGTACGTATTGTGATTGGCCACCGTCTCCGATGATCGGAGGTCAAGCCGAGGAAAGGGGAAATTTCCGGGCTGCCCGGTTGAAAGTGTCCGCTCGCATCGTTCGGCTGGCACACTACCACGATCATTGTTGTTGGAGCAGAATAGGGGTCCAAGCGGGTGCACACGCATGTGGCAAGGATTACCAGTATTTAACAAGGTTGGCGGTTTCACGGGTCAACTTTCATATCACCATCCGCGGGATAGCTCTAGGTTGTGCAATGCCTATACGCGGCGGTCCAGGCACTGCCGATCTTAAGAAATGTCAAGCAGTTTCATGGTCATGGTGTCGGAAGGGATTGACGCGCGGGCATCTCCGAGCTGAGACGGCATAAAGCGGCCACAAAATTCGTAGAAAACCTGGAATTATAGCCATGGTGGGCACGATAAAACACAAACAGACGCAGAGCTCTGCATCTTGAGGCCCTGTGATCGTCAACCCTCAACCGTATGACTTATGTGCCGGAGCCTCCTTCAAAAGTATAATATAAGGAATGGCGTGGGGGTTAGTTCGTCTCATGCCCCCCGAAACCGTCGGACCCCGTGTCCTCGTACGGGGTGCCTTCTTGGTTACTACACGATTAGAAGGAAAAGTACCCGACCTTGGACGCCACACATGTGGCACGGGTCACCCACGTAATATTAACGCAGATTGGATCTTACTGACTATTTTGCAGATCAAGGCCTATCTGTGTGATGATCGGGTACACGCACTTCTAATAGGGCGACCGATATGACACACTTCGCGGAAGCTTAGAACCCTTCCCGTTTGTTATCAACGGTAAATCCCGGATCCGACTCGCTACCTCAGAACCGACTCCTTGTTAGGGTTGGCGAGCACGGGGATTGTTCTGTGTAGCACTAATGCGCGTGCAAGGCCCCACTCTCCATGTAAGGTGAACGCCGCGACCGAAGCCGACAAAAGGCCAATGATTGAAAGTCAGACATGCCATAGGCCGCAGTGCTCCTGCGATGTCGAAAACTGCGGTAGACTGGTGGGTGATATAACAGGCCACGGCAAGCGGTCATGATGAGGCCCTAATGGACATAATGCTGTCTACCGCCTATGTACCCGGGCGACCGTGAGCATGTCATTGAGGCACTTCCTTGTCTTTCACGAGTACCTACAATACTTATGAATACCTGCGCTCATCAGTTTGAATACGTTACCAGCGGCATCAGAACAGGGCATTCTAATAAAGCCGTCTAGTAAAGCTACGCACTTACACCACGTAGTTATAGTGAGTTATCCGTAACAAACACGAGTTATAGCCTTGTGAGGCCGCGCACGTACGAAAGTTTATATCTTTTGTTTGGTACATCCTGGGGGATAAATTGTTATCACTCGAGCTGGTTTATGGTTGAAATACAACCCTGCCCTTTCTCAGCTTATACCCGAGTCCCATGTGCACAAACTATGAAAGCGGAATAATAGATCGTGTTGACTTCGCCGAGTTGTAATGAGACATATGCGACGGTGTCAGATTGAATGCCTCTCCCTGCACGCATTGCATACCTCATTCTTAGGTCAACTTGCATTAGCCCCTGTGCTCCCGATCATACCTTGTGAGTAACACATTATTTCTCTCAAGGTATGACCCAGTCGATGCTGGCCTATCTACGGTTGCCTCTTGTACTTCCTTATTCTTGCTTCCGAATTATAGGATTCGGCATTACACATGTCCTTTTCATGCGCCGACTCGATTGAATCCCTTTACTCTTCAGGGCTCGTTTACTGCGCTGTAGTGGAAGTGACACTGTTCCATTTAATCGTCCATGCGAGAGCTCTATTAAAATAGACGGGTGGTAAGAGGGGTATCGACCAAACAAGTATACAATTTTCCCCTAGGATGTTCATGAGAACGTGGGAGGTACTGCCAAGTCCGCTTCCGTTGCGTGGATACCTCCTCTTCACCATTGTTTGCTTGGTATCCTAACGTTGGATGATACGAGTCTACCAGTCTTTCCTATTTACAGTACAAAGAAGAATTCTGTCTGACGGATCCTCGTGAGACGTCCTGCATCAGGCGAAACGGCTTCACGCATTCGTAAGAGAAAGGCAAAGATCTTGGGCGAATAGTAACGGCCAATTACGCTAATTCTCGCTCTGCAATTGATTACACTTGCTACTAAGCCGCTGTGGGCAGATCTATGACTAGTTGTACCAGACATAGACGTTTTCATGCCGACAATTGGCTGGAGAGAGGTGGTACCCGGGACTACTGGCAAGCTCTTATGTGTATTTTGGTCCAGGGTGAACGACGACGGGAGGACGGTCACCTCCTTTTATCGCTGATCGGCTTGGACTGGATCTGTGTACCCCGATGCCTAGAGTTGTGGGACGCTTGCCGCCCCTTAGGAGATAAGCCTGAACAGTCGCCAACGAGCATCAGTGAGGGCAATCACTCCGTGCGCCAAATGCAATGTGGATCCCCCGTGCCTGGCTCTGTAAGAAGGATATATACCGAACAGCGGAAGTCAAACTGGATTTTACTCGGTTCTAAGGTTCTCCGGTTGAAAATAGTACATTCCTGACAAAAAGTTGAATCTTGCCAGTCAGCGGCGGCCCAAATCACATTTCGACATCCAGACGTTTCCGGTGTCTGCAATGTACTTCCATCGAGTACGGGTTTTCGTCACGAAATTTTTACTTCCAGATTTAACCGCGCACGAACCGATAGTTAACTCAATGAGTGGGTCTGATCCCGCTGGGCGAGGCGGCTTGCACTTCGGCGCTAGAGCATGGAACGCAATGATTCAAAAAATGAGTAACCAGGGGGGAACACTGTCATTACATACACCGGTCCGATCTGCGTTTGCATTAGTAGCATGGGAGTGTATCTGCGGGACTCAATTTTAAGCGCGAACTGGGTTGAATGGTATAGGATATGCCCTATGAAATGGATCAGAAATCACCCGGAGCAAGTTAGGAGTTGTGGAGGTTGCTAAGATCCTTTTCGGCAGCAAGGTGCCCGGAATTCGAGAAACCATGATAATATTAAGGGGCGGTAGGCAGCTTGTGTAGCGGCTTGCCACCCCTGGTAGATAACAGAGAAGTCGGGTATAGATCATTTATCTTTTGCGTTCCCCATTAAGCATGGAGTCGAGATAAAATCGTCATACGGAACGTTTGAAAAGGCAGAAGCCCTTAGTTATAGTCTGGATCGCAGCGACTCGCTGTGCGATCTCTCTTTTGATAAGTAGTTGGATCTCGAGATATTTCAGCCCGGTGACATGCGCGATATATGAACTAGGTTTCCATGACGTTGCGGCTATCGCGCGCGCGTCGACGTCTAGCAATATCCCCGAGCTTCCTTGCAATTACCTAACCAGTTAGGGTTGACCGTGGGAATATGTCCGCCGCGTTGAGAACTTCGAGATCCGTGAATGGTCAATGATCAAAGAATCGTCGCAGAGGGGAAGGTGTCTAAAGAAGCTACATCACCAAATGCTGAACATAATTCCCAGCCAAGCCAAACGTTGTGTCTCGCAACTGCCGGTGTTGTCATTCCATAACTTGCATGAGGGAAGGGGACCGAATACAGTCTCAGGTGGGAACGGCATCTAGACCACTACAAAAAAGATACCATCGTAAATCAATCTAGCTATTAATAGCTCGAGAACTAATGTAAAATGATCCTTGCCCTTCAAGAATGAAACCAGGAATCTTGCGGGTACACTTTATGTGTATCGATCATCCGCGTTTACCGTGAAGTTATGTAAATTTGTAAATCCGAGGACACGAGCAGAACGACTTTTAGGCGTCCCGGAGTCTGTTCGGAATGAATGAGTACAATACCAAATGACTGAGTAGAATTTCTGGGTGTCGATTAACACTTTCAAGTAGTGGCAAGCGAGTCACAGTGGTCTCCGGTTAAGCACGCCTGATGACCCTTGGGAAGTACGATTCAAGGCCATGGATGAGAGGGGGCCGGAATTGTCCAAGTTAGTTTATTGAAACCCAAGCGCAATCTTGTTGTGTGAATGTATTAACAAGCGTCGGGAAAATCCTTTAACGCAGAGGCGTGTATGATTATCAAGCGCTCGAATCTTTTAGGCGGAGTGTCACCACTAGTATCGCGAAAGGGATTAGCAAATTGTATTCGCTGAGGGCACAGTCGTCATAGGTTTCAACCAAGCCCTCTCAGTTTTCGCGTGGGTTACTTTCATGGACTTCTGCGGGTCCGTGTTGGTGCACTCATGGCGAAGCTCACGGTGCTATTGGTAACCTAATTAGAATGAATTATGCTTGGACGTATGGTTGTCCTCCCCATTTATGTTAAGACGGCGCCCCTGTCAACTGGATATTACTACACCCAGTCTCTATGCCTCTGAGACTAACAAAACGTCTGTGGTACCATATTGTAGACGCTCTCTTCAACAAGACCACACTCACTCACTCGGGGGAGCTGTGGCGAGATTGACCCTTTGCTGCAGTCGTTGCCCGATGTGATCGGTGGCGGTAACCGCGAGATATGGTGTTTACTTATAACTGGGCACTGAACGCGAACTCGGCAAGACAGATGCCCTATACTCGTTGTGTTTAGGGTCGTCGCCAGATGCCCAGATTTAACTCCATTCGCCTGGAACATCGTTGTTGCGCGTCGTCACGAGTTGCAGTGCACGATCTTCGTTTACACCGCGCGCTAGGGGAAGGCGGAGTGTCGATAGTTACGCGAGCTGTATTTACATCTAATCACTCAGACTAAGCCCCCTCAAATCCGCTGGTTGACAACTCAGCTGAGGTCCCATAAAGTCTATCTGGTATCCCATGTAGAGAAAGCAAGCGGACTGCGCCAGGAGGAGCTGTGGCAAATCGACTAGAGTAATGCACGATGTATGAGGCTACGTGCGATACTACTACTACAATTTCCGGTGTTTGGCGCCGGGGGGGGCATGCACGGGTATGCCCGTCTATTCCCTGATAGTCACGCGGGCATCCTAGAGTTTCATATAGTCACCCTAGCCTGCTTTTACCTTAAGCATAGTGCTACATTCTTGTACGAACCTTTGTTCTGGCGCGTTTCTCATACCTTGTCTGGTTTAGTTACACCCGGGGATTGGCATTGTATACAATCCCGTCTTCTTCGCGCCTATGTGGATTTAATCGAAGGAGGATTTGTCGCCACGTGCCGCTCCGTCATACGATATGGCAGGCCGGTGCCCACCTAACTAAACTATGGCATAATAATCGCCGACTGGATGTGATCGATGTACGCGGAAAAAGGTAGCCTGCGTTATGAGCTCTGCTGGATACGACGTTTTTCGCTTCGATTGTAAGACGAAGGTATCGAACATATATCCTTCACTTGTAAGGATGGCAGGCCGATCAGTCAGCTTGAATCGTACTATGTCAATTGCAATACATGTAGATGGCCGTCCTCTCGAAACCGACTGTTATCGGAGCTTTTCTTTGTGGTATTGAGTGTGCGGTTGTTCGATGGTTTTCCTTTATTACTGGGCATTATGAGGGTAATAGTGTACATCTACATTATGAAGCAACGGCGACTGGTTGTAGTGTTTCATACACATTCACTCGGCAATTTAGGCTTTTCTCAGCCTGTTGTACCGC +7 TAGACGGTCCACACGTCTGAGATTAACTGACCTCCTAGCATCAACATTTCCCTGAGCGAGGTAAATTCATTCGTAACAGCCTTGATGCGGCGGGTTTGAAGAGCTCGCGGCAACTGACACTGCATTTGACATTCATACACGCTTGGCTCCTATCCTAGACCCCTCGTCACTAAATACCCAAGTCGGGGAACAACTTACAGGAGGTCCGGCTGAAATTTTTGAATATTTACAGAAGTAACTTATTATTTTGACGTCGTACTTGTTAAGAACGTTTAGCATGACATCCTAAGGACTTAGCCTGAATACTAAACTACATCCGGTCCATGTTTTCACCGGCCCACCGAAGAGTCTCACACAAAACGTTTCCTCTACATTCCACTGTTATGTTTTGATTACGTGAATTGCGCGGTGACCGAGAGCGCGGACAGGCGGATCGCATGTTATCAAGCGACCCCGCCTAGCCGGGCTGAACAAAGCAGTTGGGTTGAGAGTTATCCGTAAGTTCTCTTCTTTCGACGGCTATAGAGTACTGGCTGGACTAGCTGATTGTTCATTGGAGGATTAGCCTGATTGACGGAATCTAACGGTACCGTTCTGGCCGGCCAAGCATGCAACTTGGAAGTGGTTAACGGTTCGCTTATTTCGATTCAAGTCTGCCTTAAAGGGCTACGGGGCCTTCCGCCCAAAAACGGCAAATCCGGTGTCATGTAATTACAGACCGATCCTCGAGGGGAAACTCGGAATCCACGTTGGATCGGAAGGCCGCTGCTGCCTACGCACGGACGAATATGCCGCTCGTACACAGTTATAATTATATGCTCAGAGACTCCCTGTCGGGTTCCTAAATCCCAGTGCAATACTGTTCCGAATTCTATTCTGGCGAATGTAAAGCGCATATGCCGTGGGATTGATCCGATCGACTCGTACCGATACGCTATTTGGAACCACTCCAACGCAGCTGCAACTCTTCTGGACAATTGTAACGAACCAAGAGAGGCTCTACGACTTTACACTGGCACGGCAGGCCGGCTGGTGCAATCGTCAGAATATACGCTGGAAGGAAAAGTTCGTGCCGTCCTTAGCGAAGATTGGCCTTTCTTGCACACCAGTGGGGGATTTGATTCTCCTCAAGCAGACGTCCTGACACGGGAACCAAAAACTCCCAGTCGGAAATAACGAGAATAGATAGCATGATCAGAAAGGACCAACAGGAGCGTGACGCTGAGCGCTGTGACCCGAAGTGTGAGACTTTGGTAGAGCGTTCGTAAAGCGCGCATAATGAGAAGTAGCCCTAGGGGGTGTTCAACCGCGCACGGCACCGACGATTCACCTGACCGGCGGATCAGCCTGCCTTCGGTGTTGCGATTGAATATTTACCAGTTAGAAACAATGTTCCATGGCAGACTGAAAGTTAGAAGGCGAACTCGTCCGGGTGGCCTTATGACGTTTGGACGTCACATAACAATTTCTCGGCCGGGAGAGGTGAAAGCTTTCTTAGAGTGCAACACTTTGGAGCGGGAACGCGGTTGCCATAGCTAAATAATGAAATAGCTGTACTCCTAGTAGAACTCCAGTATTGTAACTGGACCCGGTGTTTCAGGTGAAGAGACGAGTCAGCAGAGTTAAAGCTTATTTGTATGAGTTAAGCCCGCGAGGGACCAGATGGAGTCCGAGCCTTAGCGTGAACCCACTAACAAATGACCGATCGCTACATAAAGGTGGGGGTGGTTGTCGGACAATTGGAGTTGGTAACACGTCCTACATTCAAGCGGCTCCTAAAACATGAATGGCATATGATATCAGTTGGTGCTGCACATCTGCTCAACTGGAGCCCCATCACTCAATAAGGATCCTTGATGGCGGAAGAGCGTTGTGGTCTACTGGACCCGAGACCTTCTTGCTGGTCTCGACAGTGAAAGCTCACCTTAACTGCCTACTTTAACGAGATAGGACCTTCCACGCCCCGCGTGCGGAAAGCAATCACTCTCGCTAACCCGATCAATAATTTGAGGAATACTAAAACGGCAGGGTGCCGTCGAGCTATCTAAAATGACTAGCCACCGTCTCGGAACTTAATGGCTCTGGAAAGTATTCATGGGATGTATTCGTAGTGATCTTGACGTCGTTACGGCGAGTATTTACCCATTGAGGTCCATTGCAGACACGGACACCGCAGCAGGCCAATGCCGCACGGGTTCTAGAATTGGGTACGCGCATGCAAGCATAATAACGTAACTAGGAGAGGACCATCTAATGCGCTTGATGCTTAAGTATGGGTCGAGATAGAAGCCCGCGTAGGTAGTAGCGCCGGTCGCTTGCTATCATACCCATCACAACGCTTGTATACATTTCCAAACGCCTAAGGGGTCGGAAGGGGGCACCCCCGGGACATACTCCCCACACCCGCTTCTATGAATAAGCTGGAAACGCGCGATTATAAACGAGGGGCGTCCACTATAAGCCATCAATGCTCGCAGACCTTGAGAATTAGCGATCAGCTCAGGGTGCCGTCACATAATACAGATGCTGCTGCATCGCGCAGTCCAGTTCGCTGTTACATCACGAGTACATCGAACCGCAGCGAGCTATGGCCCGCATACTCTTAAGAAGAGGCAATGTGCACCGCGCTCTCCAATAGATAACGTAAGCGAGTACAGGGACATTTTTCTGCCTTGAATGCCACCGCCACTTGAGTACGAGCAGAGTATTGCATCATGCAACCACAGATCGACCTGGGGAAATTACGCCTTTGAGCGTTGTATCTCACGAAATGAACTACACATTAAGGCGTTTCGAGCTAGCGTGCCGACTCCAGATTTTGAGCCCGGGATAAATTAGTGACAGATGCCGAATACAAACGGGCGATCTGTCTTCAAGGCTCTTGCCAAACCACGAGGCTAAATGGGACCGAGAAGTCACCCAACCCGTTCGGCAATTGTGGGTGCTTTACCTCTAACACCGCGACTCGGCGAACCAGTCAACGAAACTACTTAATCTCATATTCGCGCGGCGCTCCCGAGATCTTCTCGACACGTAACAGGAAATATGCGTTATCAACTCTTTTCCAGTGTGGTTAAATGCTCATACCTGGAAACAATGATGCTCAGCGTAATAGAGAAATGTGCACGAGCCGTAGCGCGGCTCCTGAACCATACGGAAGTGTCCAGGAATAACCCAGCCTATCTTCATGAGGGGACTCTGATAGGTACATTACCAGGAACTCTTTGACATCGTAGAAGCCTTTGGACAAGCTGAGCGCGATCCATCACACACTTTGACTGGCTGGGGGGCTAGTCCCTGCGAGTATCCAATCCGGACACACTTGCATGTTTCACATGCGTGTGTTGGGGACTGTGTTTAAGCCTCCTAGCTTCAGACCGTGGATCTCGGGCTGCCTGTTGTTCTGGCTCAGGCTGATACCTGACAATGGCTCAAGTCGCGATTCCCCGCGGCGTCCCTTCTGCGATTCTGAGGAAAGACCGTCTAAACCTTCCGGTCATGAAGGACCTGAATAGGGGCGCTGATATCCGAACTAATACCCCATTTTGTCCAACTAAAAGTCGGGGCCGAACGAGTTTCGGTCGGTTTTCTGGAAAACCAAGGGTATGGTATTCTCATCAACGGCTCTCAACCCAGATCAGTGCTTCTGGCTGGTCGACGAACATTCCATGGCTAGGGGACTGATCGTCGAGCGTTACAAGAGCTTGATAAGTAGTATATAGCCCAAGACCTTCGGTACATGCATGCCTGGTGATTGCAGGAGTTCTAGTGTTCGTGGTTAATCTGCGATTACGAAGGACCGTTAGAAACAGCGTCGTCATCGTCTCGGACAGTTGGACTTTCCCACTACTATAGCCTTGCTGCCCCGCCACTTGTACGCGCTTACTGATAGTCTTGTGTGACCGTCTGGCTTATTTCGTTCAGTAACTCGAGAAGGACAGTGACTTCTAACACTAACGATCACTTATCCCCCCGACGGTCGGTACTAAAAATATTCACGAGCGTTTGTTGGTGATCTCACAGTACCAAAGCTGGATTGATATAGGCTATGGCTAAGCGGTTGGATCCGTCGGTGACTTGCAACGGGCTCCTCTACGGCAACGGTATCTGATACCTAATAGTGTAGTGCTACACCATAACAGAGCGGTATGCTAAATGTGATTCACTACCGGAGACGTTCCGGCCTTCCAACCACACGTCTGGCGCACGTACGAAATACCTAGCTCTTTCCCCTTTATCATATAGAGCCTCTCTTAGCGGACTGTGACCCGTGTCCAGACAAACATGGAATTCTCGTATTCGCTATCGTCTAGAACTGCGTGACTAAACGTCGCTATAGTTGTACCGATCTGGCTGGACTCAGCAAGAGAATACTCGCGCAAAAGGTGACAGGCTCGCACGAGTAACACTCACGCTTCTATGCGACGAAGTTGTTCGAACCCTCGTGCGGGACTAAGCGCAATATGCGTGCGTGAACGAACTAGGATGCCGTTGGGTTAAGCCCGGTACACTCATGGGAGGTCCTTGAACAACTTATCGGACCAGGGTGCTGAATCAATGCTATCCTCGTTATGATCTTTCCCTCACGCGTGTATCCTCATAAACCGTTCACGTTCACTCGACAATCGCTGGCTTGCGACGTGTTTGTGCGCTATGTTTGGTTACTCGTAAGTCGAATTGATCCGTCAGTCAGTACCAAATCAAAGTCATTCTCAGGTCTTACCCGTTGCAGCGTGTAAATCTACCCGGGCGAAACATGCGGGTCCTATACTTGCGTTATAGTATATTTTTAGCAGTTTCCACCCTCATCGGTAATCTAGGGGGGATCGCATTCCCGCAGAACTCTATCGGATGGTAGGCCCAAGCAATGAGTGAGTCACGCCGCGCATCGCTACATTAAAGAACAGGAATGCGATTAGATCTAGGCCTAAGTATTCTGGCACTTACGAAAGCGGTACAGAGAAAGGTCGCATGCTTGCTGGATCGGTGTGATTAATCGTTACACCTGTCCACATCCTGCTAATCCGACACCCGGTAAGGCTTTCCATATATGAGTAATTGTGTGAAACGTGCGCAAAAGTATGCATCGAAGTTTGGTTGCAAACGTCACTGGAAGGGGCCTGCGCCCAACTCGTCGCAGGGAATAGGGGTGATCGATCGAGCGGCACTGGCCATGCCTTGAGCCCAAGCGGGAAGTTGGATCACAAAGTGAGAGGCTTCCCGATGTAAAGATATGTCCCTGCACAAGAGGGGGGACGGACCGGATAAGCTCGGGTTCCGGTGTGGCGAGACATTGATTTGTGAGTGCATATAAATGGCTACGAATTCGCAACTCGGTGCCGCCGCTCTCGAAACTTAGGACAGAAGATATCGCTTTGAAATGATTAGTTCGCAGCAGAGTGTTGGGTGATGTGTCCCTTGTATGGGTTTTGCGGTCGGGCAACCCGGCGACGTACATTGCATTTGTTATTTGGTATTCCCGATCAGTTTAACGTTGTCAAATGTTCCAGAACCGTTATGGGCGTGTATATGGGATAGTACTGTTGGTGTCTCGCTAAACTTCGCACTTATTCGATTCTGGAAAGCCCGCTCAGTCGGACGGAATTGACAGAGACCCAGACAACCCTTGCGGCCAACAGCCTGGCCTGCCTCCTATTAGTGACAAGTAAATGTTGCCAGAACTTGCGGCGTTTTACCGCAAGCGGTTCCTATAAGTACCGCACGTACGTTGGTCCGTCTCGGCTCCAAAGAGACGCTACACAAGGTACCATCACAAATTGTGAAGTCACTAAAATTTCCTGGCCATTGCAAAGCGCGGAACTACCGTTCCCCACAGCAGACGCAATCGGGTCTTTAAGCATTGTTCGTACGCCGTCATATCTCCTCGCGGGCAACTGCGCAAATACGAATCTTTCGCCATGCTTTTATTGGGAGAGTTAATAAATGGGCGTCTGTTTTGCCGTTGAATGGTTTATATTGGCGGGCCCGTCCGGTTCTATGGTCCGTGCTATGAATTACCCGTGATAGATAGTCGGCTGGGGGCAGGAACGACACACCCCCACTAATCCAAAGTTAGTGCATTCTGGCAGGTCAATGAGCTCGATTCTTTATTGTACGCGGTAGGTGGTGCCTCAACCAAATCGCTTGTTACATGCGTGTACCCCTGCCCCTACAAGGATAGTGATTGTTGTAATAATTACCGGTGGGAAGCAGTGGTACACTTCCCAATCTTTACTGAAAGCCTGGTTTACTCTCATTCGTGACTGATCGCTTGGCCCCGATCTATACCGAGGTATTATAAGGTATAGGGGCCGCACATTTATAGCCCCGTGGTAAGAAAATTGTCACTCCTACCCGACACGTTCCTACCACCCTTCACTGTCTTTGCCCGATTTTACAAGGGGTAGCACATACACCCCGCGTCCGTTCTGATCAGACCATTGCCATCCCCCACCGCTCATGTCTTACGTTAACCACTCTGTGCCTGTTATCTTTAATTTATCTGGCGTCAGGTGTAGTCCCAGAGACTAATGATGTTACCCATACTTATGATTCCCAATCATGTGTAATGAATATCGCTGAAATTAGAATCCCGATTCGCAACTAATCCCAGCGAGGCTCACGCGTGGATGAAACTAGCGGTACCGGGCGGCCGGTATTCGGACGACTTAGCAAGGTTATTTGAGAGTTCGGCATGCTCAACCTCACGGTACTGGTTAATGGATGTGTGGGTGGCTTCTTACTATTTAGCGACTGGGAGTATTCATAACTTATCCTCCCGCCAGGCAGTCGGAGGCCGCCCCTGTGAGACCTCCGCCCACCTTTCTGATTTGGAAAGTTACAAACACGTGCTGATAAGCCGATGTTCCCCGATGGCCTATCTTGTTACGGAAATGCCCTCTATCGGATTGCAAATAGGCTTGCGTCAGAGATGTGGAACGTTAATACGAAAAAGCATTCAGTCGGTGAGCGTGTGCGCCTCTGTTACACATGTTGTCTTCCGGTGATCGGAACTGACACGTTCATTAACGACGAGAGACACGAAGATATACGTATACTGTACAGATCGGATGGAATAGTGTTAGGATCAATTACAAAGGGGTCAATGAAGGTGAGGCACGCAACGTTTGATCTTATGTCGGAAATCGTGTTGAGTAGTCATGCTCTTTCTCGGTCATGCCCTAGCACAGACGCTGAGGTTCCAAGGCAGACGTCAGCTCTCACGATCGCGCTTCGCCCTGTGCGTGCTAGTCACCGCTTGCTAAGGCCCCTAGGGCCCTGTGATTCGCTGCGATCCCTAATCCCCACCCGTAACTCAAGGCTTGACGGCGGAACGGTTCCCCTAATGTACGATGGTAATGCGGGGACCCAATAACCATACGCCTCCGATCGTGGACCAACACAATCATTGCCTGAAGCGAGGATAGGATTAGATACCTGGAACGGAGCAATTTTGAGAGGCGTATGCGGTGGATTGTTTGGGCTTTTGTCTATCACTGACGGAATCATATTGCATCACAATACCTGACGCCTTTCGGGGTTCACTGGTGGGCAAGATCTACCACAAGTCCAGCATTACTCAGCGAGTGCTCCCTAGGCGTCATTCACGAAGCTGTAGGGGTCCCTACCCTCCCTTCTTGAGTTGGGTGGCCGCCTCTGTAGATGGTCTCACCACAGCTATTAATAATGATAAGGCACAATTGTGACCAGCGTTGCGCACTTTGTAAATTAGGGAACAAGATATGGGGTGTGGGCATTATGATTAGAGGGCCTGAGATCGATTCAGTCGCTGCATTTAATCGGATGCATCTTGCTTAGGGCTGTTACTTACTGTATATTGTCACGCGCACCATTCCGAGCCATCAATGGACATCATGCCCTGTAAAAAGGTTGGCCATTTTAGAATGGCCCAAGCCATCTTGGAGACCTTACAGCTCCCCAAGCTTCAAGCAAATGGTTCGGACCATAGAATGTGACCCAAACGAGTATTTTGCAATCCTCTCACGCAATGATCAATTTATATAATCTTTGAGCTCATGGGCGCTCTCAGGTGAAGGGATCGCCTGCTACTGTAGCTTAATGATGCCTAGGTTGAGCACACAGCGTCGTTATTGTGCTATGGACACGAAAAATGCAGATCGGCCAAAGGGAGACATAGTCTTACTTTAATATGCCTACGGTGATCTCCCGGCGAAAAGCATTAATCAGATGGCGATACAGAGGTCTCAAACATGGGGAATCCAATGAAGCTGGTTAATGCTAAGGGGCACATCTATCGATCCCTGGTCATACCGGGATCTTACTAAACGGGGACGAACTTATGATCATCGAATCATAATGACTATTACAGAGGCTCGTTAGTTCATGCGGGCTACTGATGTCTGATATTATTAACCGGACCCAATCAGGCAAGTGCGGTCCTCTGACTCCGTATGGCTCCTTACGCGAGGACTCAAATATACAACGCTGCGTACTGCTGTGTGGAGGCTCAACTTTCTCGGTGTTCAACATCTGCGAGTCGTCGGCACCCTCCAATACAGGGGTAGGGGACTGACTTGCTTGGTAAGTCAAGGCCAACTCGGTTACCAACAGAGAGGTGTTAACTACTGCGTTCGAATCGTTGATATCGAGTGTGACTCTAACACGCGTGATCGTATGCAGATTTCCATAGCCGGTTACATATTCATCCCGTACGAGCGAGTGGACCAGATATTGACATTCGCGGGCGCGCCGTAAGGTCCTCTTGCCGGAGCGAATATGTTTCCGTACAGGAACCGTCCTTGAATGTCAGTCTATTTGGGCGAACGTGAGCGATTTTCTGGCGGATTATCGGATCAAGTATAAGCGAAGGGGTCCCCGTAGCTGTGCCATACGAACGGGTGTGCCTCGAAACTGCAGCCCCCTACACCTTATCGTATGATCAATTAACTCTGGGCAGAGGCTGCTTGACACGGAGAAGACACGCTAAATGGACGTTCAGAAACAGGGACGGCTCAACGCGTCATCGGGAGAGAGGTGGGGTGAAGATGTGTCATACTCCATGCATCTGCGAGGCCTATCGGCGCTAAGCGGGAAGTCGTCGACGCGCTAGACAGATTTCACTTTAATGAAGTTCACCGCTCAGCAATTAGCCGGACAGTGCCCGTCCAGATAAACTTAACTTTGGGAGGAGAAGAAATAACACAACGTAACGTACGTCGCTTCGACTCACGGTCGTTATCTGATAAAAAGGGCGGGATGCGCAGTTTGCTAGATGTTGGTACTCTCTAAGCCTGGTACACCATTTGAAAGACTTACACTTACAATTAACCGGGATTAGGTAGGATACTCGCAGTACGCTTACCATACTCTGGATACGGGGAAGGAATTCGGTTTCTCTGTTTTTCTTACACTTGGGACCTGGCGTATCCATCTGCGCAACGTTTTGGCGAAGTATATCCAGGCACTAGCGTGAGAAGATATCGGCGCTTAATCCCGGCTGTCGGATATTTTGCAATTGGTTTGGCATCATCGTCCTAGTATCTAGTCAAGTTGTCGTTAAAGCAAGCACGCGAATAAGGAGGCGAATACCAAATGTTCGGATTAATAGAGCCCGATAAACTTCTTCGTCCGTTGGCGGTCCCAGGTTTGCCCGGGAACCAATAAACGTATTTCTAAGACTCTAGTAAGTCCGCTCTCCCATTCTTTCTGAATAGTACTGATCGGGCCTAGGCGGGTCGTAGCGCCTGCTTAAACTCTCCGAGCTCGTCCACGTTCATTTAAACGAATACCCCGTAACACCATTCTGCACATCTAGCACTCGTTGGAAAGTTAGTCCAATCCTTCTACCTGGCTATCCTTCTCATCTTCACTGCTGACACTGCGTGGACCAAATATTTGTTATCCAACTTCTGTGACCGTGATGGCTTCTGTTAGGAGGATAGCATGTTCACGGAGCAATAGATCTTATAGCTCTTCTAGAGTTCGGCTATTCTCCCCGTGAGTCACCAGACAGCTCCCA +8 TTGACTTTCCATATGTCACAGATCAACTGACCACCTAGGGTCAAAGTTTCTCCGGGGGATGTACATGCATTTGTCACACTCTTGACAAAGCGGATTTGATCGCCTTGCTGCAATCGACACTGTAGGTGACATATTAGCAGGCTGGGTCCCTATGCCAGACCCGTCGTAACTAAAAGACCGAATTCGGGAACTTCTCGCCGGAGTCCCTCATGACCCGTTTGAATATTTGCAGAAGCACCCTCTTTGTTTGACGAAGTACATATTAAGAATGTTGTAAGATGCATCCAAAGGACCTAGGCTGAAGACCACACTATATCTAGTACTAGTTCTCACCTGCCCAACGAACAGTGTCATACCTATCGGTTGCTCTAAGTTACACAAACATCTTGTGATTACGGGAATAGAGCTGAGACCGTGAGCAGGGAGAAGCGGATCGCATGGAACCAGGCGACGCCGCCCAGCGGCGCAAAACAAAGCCGTTGGCGTTACAGCTATCCTTAAGTTTTCTCCCTCCTACGGCTCTAGTGTACTGGTTGGATTAGCTGATTCTACCTTGGAGGATTAGGGTGAGTTACGGAATCTAACTAGAGCGTCCCGACGGTCCAATCTTACATCGTGGAAGTAGTTAACGCTGTTCGTATTGCGATGCAAGTCGCCCGCATAGGACTACGGTGCCGTCTTCCCAAAAACGGCAAAACAGGTGTCAAGTATTCACTGACCTAACCTGGAGGGGAAACACGGGATCCTCGCTCGGCCGGGGGCCCACTGCTTCCCACGCACGGAAAAATCTGCCCCTGGTACTCACTTATCATGACAGCCCCAGAGACTCCCAGTTGGGATCCTTAATGCCAGCGCAATACTGTGCCGAATTCCAGTCTCGCGATTGTATGGCGCAAGTCCCTTGGGATTTTTCCAATCCAGTCGTACCGAGGCGCTCCTTGTATCCACTCCATCGCAGATGCAAGTCGTCTGGACGATACTAAAGAACAAAGATAGGCTCTACGAATTTGTATTGGCACCGCCGACCGGATGGTGAGATCGTTAGAAGATACGCTGGAGCGAAAAGTTCGTCACGTCGCTAGCGAAGTGTACCGGAGCTTGGACACCAGTGGTGAATTTGCTCCGCGCGCAGAAGACCATCTGACACGGTAACTAAAAACTACGACCCCGAAATAGCCAGAATAGCTAGCATGAGTAGAATGGACGAACAGGAGGGATTCGCTGAGCGCGGTGACCGAAAGTTCGTGTACTTGGTAATGCGTTCTTAAATCCGGCATAATGAGACGTAGCACTAGTGGGTACTTAACCTTGTACGGCGCCGACGATTCACCTGACGATCGGATCGGCTTGCCTTCTGTATTGCGGATGTAGTTTGTCATATTAGATACAATGTTCTACATCAGCCTGAAAGCCACAAGCGGAATTCATTAGTCTGGCCTGAAGGCGTTTGAACTTAACTACACAATTCCTGGGCCCGGAGAGGTGCAAGCGTTCTTACAGAACACTACTTTGGAGCGGAATCGGTGTTGCCATAGCAGAATACGGAAATCTATGGACTTCTAATAGAATTCCAGTATGATGACCGGACCCGGTTCTTCAGGTAATGAGACGTGTCGGCAAAGTGTAAGCGTTTATGTATATGTCAAGCCACCGATGGACCAGAAGGAAAGCGAGCTCTAGGCTGAACTCACTATCAGATGACAGATCGTTAATTAACGAATGAGGTGGTGGTCGGATCATTGGTGTTTGAAACACGTCCTACTTACAAGCAGCTCCTAAAACAGTATCCGTATGCTTTAACAGTTCGTGCCGGAGTGCAGCTGACATGGAGCCCCCTTAAACAGTAAGGATACTTGGTGGACTAAGTGGCTTGAGGGTAACTGGACCCGACACCGTCTTCCTGGTTGCGACAGAGAAAGCAGACCTCAAGCGTCGCCAGTAACGAGATAGGAGATTCCAAGCTCCGAGTGCGGGAAGCAATCACTATCGCTAACTAGAGCAACAATTAGACGTATCCGAATACGGGCGGTTGGAGCCGAGCTATCTAGACTTACGAGCCAAAGTCTCTGAAGTTATTAGCTAGGGTACGTACTCTTGGGTGGCATCCGTAATAATCTTGCAGTCATTACGCCCAGAATTTACCCATGTAGATACCTTGCAAACACTGCGACCGCTGCACAACTCTGCATGAGGAGACCTAGATACGGGTACGCGCATCTTTGGCTAATAAGCTAACTACGAGATTACTTACTAATGCGACCGATGAGTAACGAAGGGTTGAGATTGACGCCCGAGGGTTTAGTAACGCCGGTCGCCAGCTGTCATACTCGTGGCCTCGTTTGCATACATGTCTAAACGCCTAGGGGGTCGCCATATGACACCAGCAGGGCACAGTGTCCTGGCCCGCTTTCGTGAATAAGATGGAATCTCTCGATTCTGACCGAAGGTCGTCAACTATAAGCCATCAATCATTGGGGAGATTTAAAATTACCGATCGGCACAGAGTGGGGTTACTTAGTACCGATGCTGCTGCCTCGGGCAGAGTGCCTCGTTGTTACATCACTCGGACATCGCACCGCAGCCATGTGTGGGCCGCTCTCTCATAAAAAGAGGCCTAGTGCACCCCGCTGCCCAATGGATAACGTACTCGAGTTCCGGGGTATTCGTCTGCGTTGACACCCGCCGCCCCTTGTGTTCGCCCATAGCCTGTCATCATCCTCTCACAGAGGGACCTGAGAACATTTCGCCTTTGAGCATAGCAACTGACGAAATGAACTAATCATTTAGCCGTATCGACCTCGTGTGGCTACTCCAGATTTTGAGCCGTCGATAAATCAGCGAGGTATGCCGGAACCGATCGGGATGTCTATAGTCAATGCTCATGTCTATCTACGAAGATTAAAGGGAATGAGGGGTCAGCCAACACGTTCGTCAATTTAGGGGGCTTTACCTCCAATACCGCGACTCGTAGAACCAGGGACCGATTCTTCGTAATCTCACTCTCGCGCCGCCCTCCGGAGATGCTGTCGGTACATACGGGGTAATATCAGTTATCGACCCTTTTCCTGTGTAGTTGTATGCTCATACATAGAAACGCTTGAGCACATTGTAATAGTCAGATGTGCGCGAGCCCGCGCGCGCCTCAGCCACCATACATAAGTGTCCAGGCACAACCTAGCCTATCTTTATGAGGGGCGTCCTATAGGTACATTCTCAGGCACACTTTTGCATCAAAGCAGCGTTTCGACATAATGTGGGAGATCCATCACACACTTTGGGCGCTTGGGGGGTAACTCCCTCACCGTAGACAAACCGCACACCCTTGCGTGTGGCACGTACGCGTGTTGGGGCTTGTGATTGAGCTTCGTAGCTTCAGACCGAGAACCTCTGGCTTGGTGTTGTTCGTTCTTAGCGTGATAACTGAAAGAGGGTCAGGTCCCGACTATCAGCCGCGGGCCGGCGCCGATTTCGTGGGAAGTACGGCTAAAGCTTCGAGTGATAACTGAGCCGAAAAGGGCCGCTGGTAGCCCGATTTACACCCGATGTGCTCCGTCTGAAAGTCAGGGCCGCATGAGTTTCGGTCGGTTATCAGGAAAACCAATCCTATGGTATTCGCATCGCCAGATCTTAACGCCACAATGTACTTGAGGCGGGTAGACGAGCATTACATGGCTAGGGGACTCATCATCAAGAGCAACAAGAGCCTGACAAGTAGGCTCTAGCGTTAAACCGTCGATACATGCATGCCTAGTAGTTGAAGTAATACTAGCGTGCTTGGTTAGTCTTCGAGTTCGAAGAACCGTTAGAATCATTGTCATTATTGCGTCGGACAGTTGGAATTTGCGACTTCTATAGCGTTGCTCGCCCGCCACTTGGCCGCGCCGAACGCTAGTAATGTGGGACCGTCTGGCTTATTTCTTATGGTAATTCGAGAAGGACAGTGACCACGAACTCTCTAGATCGCTTATCCCCGCGACTAACAGTACTAAATATATGAACGAGCGTTGATTTACGGTCTCACAGTAGCAAGGGCAGATGGACGTATGATTTGGCTCGTCGGTGGGATCCGTCGTATACTTGCCACGGGCGGCTCTACCGCTACCTTATCTGCTCCCCAATTGTGTAGTAATGCACGATGAAGTGGCGTTATGCTAAACGCGATTCACTACCGGAGCGGTTACGGCCTTCTAACCACACTTCAGGCGCACCTACGAAATACCAAGCTCCTTCCCCTTTATAATACAGGGCCTATTGCAGCGTACTGTGGCCCGTGCCAAAACAAACATCCTTTTCTCCTATTTGCTATCGTCTCGAACAGCCTGAATAAACTTCGTTGTAGTTGTACCGCTGTTGGTTTAGTCGGCTAGAGAATACTCGATCTATCGGTTTCACACGCGCACGAGTCACACTCACGCTTCAATTCGTGGAAGTTGTGACAACCCCCATACAGTACCAAGCTCAATATGTGTGCGTAGACGGGCTTGGATGGCTTTCGATTAAGCAAGGTATCCTGGTGGACAGTGCAAGCTCTACATAGCGGATTAAGGTAATGCATCAATGCACACTTCGTTGTGACCTTTGCCTCAGCGGAGTATCCTGTTATACCTCGCACTTTCACTCGAAAGTTGCTGGGTGGCGACATGTCACATAGCTATATTCGGATACTTGTACGACGAATTCATCCGCCCGTCCGTACCAGATCGACGGAATTCTCAGTATTTACAGATAGCCATGTGTTATTCTAGCCGGCCGAAACATGCAGGTTCCATCCTTGCTTGACAGTATATTTTGAACAGATTCCACGATCATGGGTAATCTAGGGGGTATCGCATTGCCGCCGAACCCTATCGGGTGGGAGGCCGTAGCAATGTGTGAGTCTCGCCTCGCATTGCTTCATGAAAAAACCGGAAAAAGATCAGATATCGGCCTAAGTATTCGACCACCTACAAAAGCTGGGCACAGAACGGTCGGATGCTTGCTGGGTCGGTGTGATTAGCCAACAGATGGATGCTTCTCATGCTAATCCGAGACCCGATGAGGCTGTCGATATAGGACTATTGACGTGGGGGCATGCCAACAGGATGCATCTAGGTTTGGTTGCAAAGGACACGAGCAAGGGCCTGAGGCGGCCGCGTCGCATGCAAGAGGGGTGATGGATCGAACGGACCCGTCGATGCCTCGAGCCCATCAGGGAGGTTGGATCACCAAGTGATAGGCTTGGCGCTATAAAGATATAATACCGCGTAACAGGAGGGACGGACCCCAAAAACTCGACTCCCGGTGTCGCGTCTCATTATTTTCGGAGTGGATTTAACTGGCGACGAATTAGCAGCCAGGTGGCGCGCCTATAGAGCATTAAGACAGGAGATATCGGCTTATAACGGTTAGTTCCGGTCTGCGGGTCGTCTGGTGTGAGCCATATATAAGTTTATCGGTCGATCAACCCGGCGGCGTACGTAGCATTTGTTGCTTGGTATCCCCTTAATCTTCGACGTAGCGAAATGTCCCATAACCGCTGTGGGGCCTTATTTCGTAGAGTACTATTGGTACCTCTTTACACTTCGGACTAATTGTTTTCTCAAACGGCACCCAAGTCGGACGGATTTAACGTAGACGCAGACACCCACAGCTGCCGTCCTTCTGGCCTGCCTTCTATTAGCGGTAAGCAACCGTTCGCATCACTTGTGGCGTTTTCCGGGATGCGTTACTTATAGGTACCCCACGTACAATGGGCCGTCTTGGGTCCGTAGAGACGCAACACAAGGTACCTTCAGAATTTGTGAAGGCACTAGAATTGCATGGCCTTTGCAGAGCGGACAACCTCCGTTCTCCACAGCAAACGCGGTCGGGTTTTTAAGTGTTCTTCGGATATCGTCATATCTCCCCTCTGGCAACTGGGAGTATACGGCTCTTTCGCAATGCCCTTTTTCTGACATTTAATTAATGAGTATCTGTTTTAGCCTTCAATGGTTTATATTGGCCGCCCCGTCCGGCTCTATGGTCTGCCATATCAATTACCCGTTATAGAAAGTTGGCTAGGCACCGGAACGACACAACCCCGCTTCTACCAAATTAGTGGCGCCTGGGAGGTCACTGAGTCTCGGTCTTTATTGTGGGCCGTAAATGGTGCCTCAACCAAATCCCTTGATACATGTGGGTACCTGGGTCCATTGAGGGATAGTGAATGTTGTATTATTTCACGTTGGGAAGTAGTGGTACGCTTATAAATCTTCACTGGTGGCACGGTATAATATAATACGGGACGGCTCGGTAGTGCCTGCTCTATACGGAGCTTTTAAGAGGAATCGGGGCCAAACTTTTATATCTGCGTGGTAAGTTAATTATCGCTCCTACCCGAGACGATCCTAGCCACCAGCGGTGTCGTTGCCCGATTCTACTAGGGGTAGCATATAAATCCAGCGTCTGTTCGGATTTGACGCATGCAATTCCCCGCCGCACACGTCCTTCCTTAACCTTTCTGTGCCTATTATCCTTAAATCAACTGTCGCTTAGTGTATTCGCAGAAACAAAACAGATTTCCTTACCTTATGATTCTCAATCATAAGTATTGAATATCTCTGGTGTTAGAATGACGATTCGCAGTGAATTAGAGCGGTAGTCCCCCGTGCAGGATACTTTCGTTACCGGGAGTACGATAACCAGACGACTTAGTAGGGTTATTTGAAAATACGGCCGGACCAAGCAATCGGTAAGAATTAAAGGATGTGTGAGAGACTTCTTCCTATTTAGGGAGTGGGAGTCTATATAATTTATCCTCCCGCCTTGCAAACGGCACCCGCGCCTGTCCAACTTCCGCCTTGCTCTGCGACTTGTAAGGATACTAACACGCGCCGTTCAACCTATCCTCCCGTATAGCCGATCGCGTTATGGAAATGACATCCCAGGGATTGCAAATAGGTTTGTCACAGCCCTCTGGTGCGATATTAAATTTGAGCAGTCAGTCAGGGAGCGGGTGCGCCCGTATTACACATGTTATCTTACGGTGATCGAAACTGACACATTCATCAACGATGAAAGATTCGAAGATGTCCGTATGCGCTACTGTTCGGATGAAATAGTGTTAGGATTACATACAAGGTGGTCATGGACGCTGATGCACGAAACTTTCACTCCGATGGCAGGGATGTTGTTGAGTCGTCACGCACGTTCTCAGTGAAGGCTTCGAAAACATCCTGAGGCTCCAATGCTTACGTGAGCTCTCTCGCTCACGCCTCGTCAAGTGCGTGCGAATAACAGCCTGTCTAAGCTCGTAGGGACCTGCGATGCCGGTCGATCCGTAACCGCCACCCGCAATACACAGCTTGACGGCGGAACGGCTACCCCTGGAGACCATGGTAATGCCGGGACACCGTAACCATATGCCTGCGACCCCAGACCGTCGCAGCTATTGACACAAGTTAGTAGAGGATTTGATCCATGGAACGGGCCATTTTTTAAAGGGGAACGTGGTCGATTGTGAGACCTTATCTCTAGCTCTGGCGTAATCCTACTCCCTAAAATTATGTGACTCTTGTCGGCGTTCGCAGGTGGGCAAGATCGACCTGAAGTCCTCGATTGCTCACCGACGGCACCATACGCGTCAGTCACGCTCCTGTAGCGGCGCGTATCCTCTGTTCTTGAATTAGTTAGACACCTTTGAAACTGGCCTATCCACCCCTATTAATAATGGTAACTCACACTCGTGACAAGCGTCGCGCAGTCAGACCAATAGGGAGCACGAGATGTCGGGTTAGGATTTAGACTAGTATCGTTGAGTTCTAAGCAGACGCAGCATTTAATCGGAGGCACCATGCTTCGCCCGGTTAATGTCTAAATGATGTCACGGGCGTCATACCGAGCCATCACTGGACTTTATGCCTAGTAAAAAGATTGGCCACTTTTAATGACCCCATGCGATTTTGGACGCTTTTCAGCTCCCCAGGCATTTGGCCAGTGGATCTGACAATAGAATGTTACCCAAAAGGGTATTTTGTAATACGCTCACGCAATAAGCAATTCACATCACAGTTGAGCTCATGCTTGCTCTGACTTGAAAGGATCGCCGGATATTGTATCCTAACGATACCTCGGCTGATCACGCTCTTTCTTCTTTCTGCTAGGGACACCAAAATTGCAGATCGGCCTAAGGCAGACAAGGACTTACTGGAATATGCCTACTGTGTTCGCGCGGGGAAAGGGGTCACTCAGATGGCGATAGTGCGGTTTCAAAAATGTAAAATCCAATGCAGTTTGTTAATGCAAAGTATCTCTCCTATTTCTCCCCTGTCATACCCCGATCATACCAACCGGAAACTAAGGTTTGACGCCATAATCAAGATGAGTATGGCTGCGCCCCGAAAGTTCATGGTCGCTAATAATGTCGGATATTATTAGCCGGACCAAATCTTAGAAGTGCGCTCCATTGACTCCGGATTACTCCTTACGAGGGGCCTCCAAAATAAAACGCTGTGCACTGTTGTTTGGAGGCTGACCTTACTGGGTGACCTCCGACTGCAACTCGGCCTCGACCTCCGATAGACGGGGCGAGTAATGTCCTGTTTCGTGACTCAATCAAAAATAGGTTAATGTAAACGTGGTGGCAACTACTGAGTCCGAATCGATAACCTCGCGTGAGAATCAGCCAAGTGTTCTCGTATGCAGTTTTGTAGAGCCTACCACATAATCCTCAAGTATCGGTAAGAGGAACAGATTTTGGCATACGCAGGCGCGCGTTATGGGACTCGTGGTCTAGGGAATGTGCTCCCGTATAGGAACTCTCCTTGAATGCCGGTGCAATTGGGTGAACGAGACCGATTTGGTGGCAGATTTTAGTTTAAAGTTTAAGCAAAGGGTTCCCCGAGGCCGTGTCATAAGAACAGGTGTAGAACGAGTCCCCGGGCCCCCACCCCTTATAGTCAGATCCACTAACTGTGGTCAAAACCTGGTTACCACCCAGCACACACGAAAAGGGGACGTTTAGAAATTGGTTCGGCTCAGCCCGTCAGCGGTAGAGACGCGGGGTGAAGATGTGTCATACTCCATCCATCTGCGAGCCGATTCGGTGTGAAGGGGTGCGTAGACGACGCGCGAACCTAATTGGAGAAGAATAACGTTCACCGCGCACCGATTACGCTGAGACTGTCCGTCGCGATCTACTTAAGTTGCGGAGGAGAATAAATAACATAGAGTAACGTACTTCGCTCGGCTTCAGGGTCGGTAACTGATAAACAGGGCGGGATGCGGTGAGCTCTAAAAGTTGGTCGGTACGAAGCATCGTACACCTTTTGAAAGATTTACACTATCCATAAACTGGGTTTTGGTAGGACACTCGTTGTACGGCTACCCCACTTTGGCTAGAGGGAACGAATTCGGCTCCTCGGTGCTTCTTACGCTACGGATCAGAGGTCTCCATCAGCACGACGTTTTGACGCACTAGATGCGTGCGCTAAGGCGAGAAGACAGCGACACTTGACCACGGCTACCCGATCTTTTGTAATTGGGATGGCATCATCGTCCTAGTAAAGAGTCGAGTTGACGTGAAACCAAGGCCGCTAATAAGTAGGCGAATACCAAATGATCGGATGAATTCAGTCGGAAAGACTTCTTCGTGTGTTTGCAGTCCGAGGTATGCCCGGGAATAAGAGAACGTATGCCTTAGTCTCCAGTAAGGCTGCTCGCCCAATATTTCTGAACAGGACCGATCGAGCCTGGGCGGGTCGTAACGCCCTTTTTAACTCTGCTAGCGGCTTCTCGTACAAATAAACGGCTCCCGCGTCAGACTATTCCGGACATGATGCACTCCTAGGACAGTAAGTACCTCCCTGCTAAGTGACTATCATTCTCATGTTCACTGCTAACTCTACGTGGGCCAAAGTACTGTTATCCGACTACTGTGACCCTGACGGCCTCTGTTAGTATGATTGCACGCTCAAAGAGCACTCTAACTTAGCGCTCTGCGGGACTCCGGTTACAGTCTCCATAAGCCTACAGGCAGTCAGAA +9 TTGACGTTACACATCTCCAAGACTAACGGATCTCCTCGTCTGAAACGAACTCTGGGTGAGGTAAATCATTCAGAAAAAGGCCTCATACAGCAGGTTTGAAGTGCTCGCTCCAGCAGACACTGTGTTTGAGATAGGAGCAGTCGGGTTGCCTAATCAAGACTCCTCGTAACTAAACCCACAAATTGGGGAGAATCTTTCGGTAGTTCCAGCCGCCATTACTGAATATGTGCAGAAGAAACTTCTGAATTTTCAGAAGTACTCGTTAAAAGCGATTGTTTGTGCATCCTAACGACTTTGCCTGATGACTTCGTCAGATCCATGCCCAGTTTTCACCTGACGACCGGACTGTGTGAAACAATGCGGTCGGTCTACGGTACAAAACTATCTTTAGATTTTGAGTAAAGGGCTGAGACGGAGAGCGGGGAGTGGCGGACCGCATTTTATCAGACAACCCCGACGAGCCGCGCTAACCTAAGCAGATGGTGTTAGAGATGTCAGTAAGTTGTCATGATTCGCCTGCACAAGAGTACTGGTTGAATTAGCGAATTGTACATTGGACGATAGGTATGATTGGCAGAATCTGACGCGAACGGTCTGAGCGGGCAAGTCTCCAACTTGGAAGTGGTTAGCACGTCACTTGGCTCGATGCAAGGCGGGATCCTAGGCGTACGGTGCGATCTGCCCAAAACCGGCAAGACTCGTGTCCTGTAATTAGCGACCTATCCTCGTGGGGAAAGATTGCATGCACGTAAGACTGGCAGCCCCCTCCGGTACCCGTACCGACGGATATTCGCGGGGCGCAACGTTAATAGTACACCCTTAAATACTCGCTGTCGGGTTCACCAATGCTAGTGGAGTACAGTTTTAGCTTCTGGTCTTGCGAAGGTAAAGCGAATGTATCGTGGGGTTGATCCAATCCCCTCGTACCAATACGCTTCTTGTTTCCACTCCTACGCAGAGGCAACTCTTCCGGACGACACTAAAGTATCAAAATAGGCTCTAAGACTTTCTACTGGCACTGCCGACCGTTTGGCGAAGTCGTCAGGGTCTACACCGGGTCGAAAAGTTCGGTACGTGGATAGCGAAGGTACCCCATCCTTGGACACTAGTTGAGGTTTTGCTTCCCGTCTAGCTGACTCCCTGACACGGGAACTCAAAACGACCTCCTCGAAATTGCTAGTATAGATAGTTTGCTCAGAAAAGACGAACAGGCGCACCACTCTGAGCGCTGAGACTGGAAGTTCGAGCCTTTGGTCGTACGTGCTTAAATCGCGCAAAATGAGCCGTAGAACTAGTGAGTATTTCACCCGGCACACGACAGGCGACTCCCCTTAGCGGTGGTTCGGCCTGCCTTCCGTTGTCCGAAGATTCACTCACCTGTGTGATACATTGATCTCAGTCAGGTTGGAAGTAAGAAGGGGAACTTATCATGCTGGCCTGAAGACATTTGAACTCCACGTAACAAGTACTGGGTCCGCAGAGGTGAATGAGTGCTTAGAGGGCTACTCATTCGGGCGGGATCTATGTTACTAGCGCCGGATACTGATACGTCTGAGCGTCTAAAAGAATGCCAGTATGTTGACCGGGCCCGCTGTTTCAGGTGATGACACGTTGCAGATAAGTGTATGCGTCCAAGTATGAGTCAAGCTAGCGACGGGACAGAAAGAAAGACAGCCTTTGCGTGAACTAAGGTTAAATAGACAGATCCCTAAGTAGGGTTTTAAGTGGTGGGCGGGTAATAGGAGTTGGGAACATGTCCTTCATACAAGAAGCTCCTTAACCAGGGAGGGTCTCCGACCACAGTAGGTGCAGCACATCTGCTGAAATGTAGCACCCTTATGCAGTAAGGTTACTTTGTGGATTTTCTTCCGTCTGGGTTACTGGACCCGACACCGTCTTCCCGTTCTCGACAGCTAAAGCAGACATGAACTGTTCCTTGTAAGGAGATAGTAAGTTCCAAGCCCCCCGTGCGGGAAGGAATAACTGACGCTAACCCGATCAATAATTTGACGAATACAACAAGGTGAGGGTGCCAAAGCAATAACTGCACTTACTATCCACAGTCTCGCACGTTATTGGCTGCTGTCCGTATTCATGGGTTGTGATCGTAGTAACCTTTCCTGCTTTACGTCGAAAATTTACCCATGGAGGGAAATTGCAGACGCAGACACTCCAACATACCCCTCACTGTCGGCTCCTAGAAATGGGTACGCGCGTTCAAGGATAATCAGGTAACTGCTAGATTACCAGTCACTGCGCCTGATGCTTAACCAAGGTTTGAGATTGCCCCCCGGGTCTTTAAAAACGCCGGTCGCCATCTATCACCCTCGCCGCCACGTGAGCATACATTTCTAAACGGCTATGTGGTCAGAAGGTGGCATATGCTGGACACACTGCGCAGGCGAGCTTTCTTCAATACGATGGAAACGCCATATGAAAAACGAAGGCCGTACAGTATAGACCATCAGTCCTTGCTCACATTTAAAATTATAGATCCCCAGAGAAAGCGATAACTAAGCACACATCCTTCCGCTACGCGCAGATCGGATCGCAGTTAGATCGCTCTGACCCCGCAGCGCCGAGAGCCTCGGCCCGTTTGCTCTTAAGAGCCGGCAAAGTGCACACCGTTGCGCCATGGAAAATGGAATGGAATTCAGGGAAAGTTGTCTTCGTTTACAGCCACAGCCACTTGTGCGCGAGAAGGGGAATTGTTCCTTCTATGACACAGGCAACTGAGTACATTGCGCCTCTGAGCATTGTAACTCACGTAGTGCCCTCATCAATAAGCCGAATCTACCTAGTGTTAGTCCTCCGGAATTTGAGCCATGGGTAATTTAGCGCGATATGCCGAAGCCAAGCAGGGTGTGTATAGTCAAGGCTCAGGACTAACCACGAAGACCATTGGGAGTGATGGGTGATCCACGACGTTCTCAACGTTGGGGTGCATAACCTCCTACCCCGCGACGCGGGGTATCAGTGAACGAAAATTCGTAAGCTGACTTACCCTCGCCCGTCCTGTGATCTTCTCGATCCGTATAAGGAAGTCTGAGTTATGGACCCTTTGCTATTGTTGTTGTATGTTCATACATTCCAATTCTAGTGGTCAGCGTACTAGAGTGATATGCGCGTTCCCTAGGGCGGCTCAGATACCATACGGAAGTGTGCAAGAACGACTCAGGCTCTCTTCATGAGAGGCGTCCGACAGGTGAGACCCCAGGAACACTTTTACATGAAAGAAGCTTTAGGACCTACTGTGCGGGATCCATTTCACAGTTTGTGAGGCCGGTGGGCAAGTCCCTGTTAGTATAGAATCCCATCGGTCTTGCATGTGTCACGTACGGCTGGTGGCGACTGTGAGCGCGCCTCGTAGCTTACTACAGAGCACTTCGGGCTTGCTGTTGTTCTATTCCAACCTGACAGCTGACAAATTCTCGGGTGGCGAGTCCTCGCCGAGGGCCGGCAGCGCTTACGTGGCACGGGCGGGTAAAGCTTCCGGTGATGAGTGACCGGAACAGCGCCGCTGATTTCCGAACTTTTACCCGATCTTGGCCGTCTTAAATTCAGGTCCGGATGAGATTAAGTCGTGTTTCAGAAAAACCTACCCAATGGATATCACATCAACAGAACTTACCCCGACTCTGTAATTGATGCACGTCGACGAAATTTACGTGAGTAGGGAATACATCATCCAAAGTTGCAAGAGCGTGATGAGCCGGGTATAGCCTTCAGCCGGCGGTACATGCGTGTTTTGCGGTTGAAGGACTAGAAGTGTGCGTGGTTTCTCTGGGATTACGAAGGACGCTTCGAATCCTAGTCATTATTGCAGATGACAGTAGGACTCCACCACGTGTATAGCTTTGTTCGACACCAACTGGTACGCGACGTAAGTTAGGCACGTTGAACCGTCTGGGTACGTGCGTTTGATAACTAGGGATGGTCAAGGACGGCTAAGCCTAACGAGCCCTTATCCCCCCGACGATCGATTCCAACTATATGCACGAGCGTATCTTTGCGATCTAGCAGTAACAAGGCTATAAGGAAGTATGATATGGCTGGGCGATTGGATCCGTCTGAGACTTGGAACGGGCTCGCTGACGGCGACCCTATCAGATCCCTAATTGTCCAATGGTTCACGATATCAGGGTGATACGCTATACGCGATTCACTACGCGTAACGTTCCGTCCTCCGAACCGCACGTCAGGCGCACGTAACAAATTCCTAGGTCGTTAGCCTTTATAATGTCGGGCATATTGTCGCGGCCTGTTATGTGCGCCGATACAAAGATGGATTGCTCGGGTGTGCTTTCGTCGCGAACTGGATGCATAGACGTCGCTATAGTTCAGCGGCTTTTGGTCGAGTCGGCGAACGAGTAATCGCTCCAACGGACTTACACGCGTACGCGTCACACTCGCGCGTCAATGTCAGGGAGTCGATATCACCACCGTGCGCCACAATCAGCAATAAGCGTGCGTTCACGAACTAGGTTCGATATGTATTAAGACCTGTACCTTAGTCGATGATCTAAGTACAAATGCGCGGACCAGGGTAACGCATAAATGTACTCATCGTTATGACGTTTGCATGAGGGGTGTATCCTATGACTCCGGGTTATTAAACCCGATAGTCGCGGGGAGGCGGCTCGTTGTCAAGCGATTTTCCGATACTTGTACGTCTAATTGCTTCCTCCGTACGTACCAACACAAAGAAATTCTTGCGCCCTACCCTTGGCCGCCTGTTAATCTTCCCGGACAAAACATGCGCGTCCCATTCGTGCATGACATTATATGTGGAGCAGTTTCCACGAACCTCGATGATCTAGGGGCTATCGCCGTACCTGTGGAAGCCATCGGATGTGAGGCCGTGGCAATGTGTCAGTCACGCCCCTCCTTGCTACATTCTAAGACAGGAACGCGATCAGATATGGGTCTAAGTATTAAAGGAGCTAAGAACACAGTGCACAGAACGGTCTCATGCTGGCTGGATGGTTATGACTACCCCACAGACGTGGGCAAATCCTACAAAGCCGAAACCCAATAAGGTTGTCGATTTAAGAGTATCTTCGTCAAATCTTGGCAATAGGATGCATCTAGGTTTGGTGGCAAACGACTTTGGGCGGGTCCTACCGCTCACGCGTCGCTTGGGATCCGGGTGATCGATCATACGGCCCTGTCCTTGCTCTAAGCGCATCTGGGAAAATGAATCATAAAGGGACAGATATCGAGATATAAAGATATGACACAGCACAAGAGGGGAGAAGGACCCGATAATCACGTCGCCCGGTATTGCGGATCATTACTTTTGCAGTGGATTTAAAGGCCTACGAGTTCACAACCCGGTCGCGTCGCTGTCCAGTATAACGGCTGAAGAGATCGGCTTGAGATTGTTGGCGTCGATCTACAAGTAGGCTTATGTGTCGCGTAGATGGGTTTGGGTGTCGATCTACCGAGTGGCGTAGCTAGCATTTCCTGCTTGGTATACCCAAACAGCTTAACGTTGTGGAATGTACGATTACCGATTTGGGCCCGCATATCCCATAGTACTATTTGTGTCCCGTTACACTTCGTGTTGATTCTGCTCTGATTCCGGCGCTTAGTTGGAAGGAATTAAGAGTGACCGTCAATCCCCACATGGCGTACCTACACGCCTGCCTCCTTTTTGAGATAAGCAAATGTTCCCTGTACGTACGGCGTTTTCCGGCATGGGTTAAGGATATGTAGCGCACGTGCAGTGGGCTGTCTTGGCGCCAGAAAGACGAAACGCATGGCGGCATCAGACTCTGTGCATCCACAAGTATTTATCGCCCTGTACAAAGCGCACTCCCTCCACTCACCACAGCGGACGCGATCGCATCTTTAAGCGGTCTTTGGTTACCATTATATCTCATCGGTGGGAACTGCGAGAATGCGAAGCTTACACCATGCTCTTATTCTGAGTTTTAATTTATGGGCAGCTGATTTCACGTTCAATCGTATATATTGGGGGTCCCGTCGGGTTCTATGGTCTCTCATCTTAATTGCACGTTATACATAGTCAGCTAGGGACCGGAAGGACACAACCCACGGTTTAAGGACGTAGTGCGGCCCGGGAGGTCAGTGAGTCCGATTCTTTATTGTGTGCTGTAAACGGTGCAACAACCTTATCCCTTGTTAAATGTGCGCACCTGGGTCCCTTGGAGTATAGTGACTGTTCTACTAAGTCCCCACGGGAAGGAGTTGTACACTTAACACTCCTCCGTGGAAGCCTAGTCTCATCGCTTACGTGACTGCTCGGTAGACCACGACGTGTACTGAGGTATTATATGGCCTCGGGGTCGTCCATTTATATGCACGTTTTAAGGCAATTGGGGGTCCAGGACGACTCGCTCCTACCAACGAGTTGTGTCATTCCAAGATTTTGTCAGGGGTAGTACATACTACCCGCGTCCGTTCTGATTAGACCGATGCTATTCCCCGTCACATAGGCCGTACCTAAACCCCTCTGTGCATTTCAACTTCAAATCAAATCTAGTTTGGCGTATTCGCAGAAACAAATGATGATCTCCAACCTTGTGGTTCCCAATCATGGTTAATGCACATACCTGACGTTAGAATGACAATTCGCAATTACTTACAGCGCTGCTCCAACGCGCTGCTAGCATTCGTTACCGGGCGGTCGAAATTCAGACGACTTAGAAGGGTTATTTGGTGGTCCGGCGCGAAGAAAATATCGGTATGGGGTAAGGGATGTGTGGGTGACTACTTACTATAGAAGGATTGAGAATATTAATAACTTATGCTCCCTCCTGGCAAACGACAGCCGCGCCTGTCCCACATCCTGCGCGCTCTTGTTATTATAAAATTAAAAACACGCGTCGGGATACCCAGCTTCCGTTTTCGCCAAACATTGTATGGTCATGAGCTCTCTGGGATTGAAAAAAGGTTTTCCACAGGGATCTGCTCCGTTATTACGATATAGCATTCAGTCCGTGAGTGAGTGATCCCGTGTGACACACCATGTCATCCTGTAAACGAAAACGACACATTCATCCACGACGACCGCCTCGAAGATATGCTCATACGCTCTAGTTAGGGTGTAAAAGAGTGAGGAATACTTAGAAAGGGGTCATGTACGGTGAAGCACGCAACGATCACTCCAATGTCGGGGATGTTGTAAAGTCGGCACGCCCTTTCTCTGTCCAGCCCGAGCAATCAACCTGTGGATCGAAGAAAGCCGTCAGCCATCTTGTTCACGCTTTGTCCAGGGTGTGCTAATCAGAGCCTCAGTAACCTGCTAGGGCCGTACGATTCGCTGCGATACGGACCCGACCGCCACAACTCACAACTTGACGGAGGCACGTCTGCACAGGGGAACGCTGGGAATGACGGGAACCATTGTCCATATGCTGCCGACCGCCCATAATCACAGACATTGCCAAAAGTGAGGATCGGATTAGATCCCTGGAACGTGGCAATTATGCGACGGTACGGCGGTAGAACGTGTGACGTTCTTTCTTGCGCTATCGGAATCCTATTCCCTAACATTAAATGATTCCTGTCGGAGTTCACAGGTGGTCAAGACCGACCTCGGGACCAACTTAACTTAGCGAGGGCGCGATATGCGTCATTCAAGCAGGTGTAGGGGTCAGGATCCTCCGGACATGTGCTGGCGTGCCGCCCTCTAAGATGCTGTTTCCACAGCTTTTATTTATGGTAGGGCACAATTCTCACATCCGTCCCGCAGTTTGTCAATTTGGGATGCAGTGATGGCGTGTGGGCATGATTAGTAGAACCCTTAAGTGTGATGCAGTCGCGGCAGTTAATGCGAGAGTCCATGCCTAGCCACGCTCCTCACTATATTGTGTCTCGCGCGCCATACGTGGTCATCAATCAACTTCATTCCTTGGAATAAGGTGTGCCATTTTTAAATGCCCCTGGCCTTTGTGGACACCTGTCAGCTCCCCTGGCATCAAGCAAATGGTTCTGTCAATATAATGGATCCCCACAGTGTATGTTACTATAGTGACACGCACTAAGCAATTCACCTCACCGTTGAGCTTATGCGCCCACTCAATCGCAAGGCTCGACTCCGATTGCATCTCAACGATACTTAGTCTGAGCTCCCAGGTTCGGTATTTTGCTAGGGATAACAAAAGTGCAGATCGCACTAGGGGAGACATGGTCACACTGGAATCAGGGTACCTTATTCTCCCGGGGAAATGCGCCGATAAGATGGCGATCCTGCTGTCTCAGACATGGCGAACCCGTTGCAGCAGATTAATGCTAAGTATCACATCGATGAATGCCCCCTCTTCTACCGGTCATCCCAAAGCGAGGCGAACTGTTGCTTACCCCGGCAATCTCAATAAAGCTGGCCCCCGATATTTCATTGTCCCTAATTATGACGTGTATTATTTACCGGACCCAATGAGTCAAGTCGGGTCCGCGGACTCCGGAATACTTCTTACCAGAGACGGTAAATATACAAAGCTGCGTACTGTTAGTTGGAGTCTGAAACTCCTGGGTATCCTATCCCTGAAACTCCTGCTCAATCCCCAACAGACGTGTCGACTAATGTCTTGTGTTCTACGTCAAACACAAATAGGTTACCAAAAAGCTGATCTTAACTACGGCCTTAGAATTGATGATACCAAGCGTAATTCAACGATGGGGTCGCTTATACTGCTTTGTATACCCTAGTTGAGATGTCTATCTTAAGAATGCGTTGAACCTAAATTGGCATCCTCAGGCGCGCGGTAAGGGTGTCGTGTACGCGCGCATATCTTGCCGTATAGTAACCCTATCTGAATAACCGTACAATTGCGCGATCGCCAGCTATTTTGTAGCAGACTTTCTTAGAGCGGTATAGCGAAGCCGTACGCCGCGATGTTTCACAAGAACAGGTATGGATCGAGTCCCCGGCCCCCGACCCCCCGTCATATAATGCGTTTGCTGTGGACATTACTTGATTACCACCCTGTACACGGGAAAAGTGGATGCTAAGAACTTGGTACGACTCAAAGCGTCATATCTAGGTATGCGGCGAGAAGACGATTCGTACTCCCTGCAACTGCGGGGCGAATGGGCGGTATGGGGTGAGTCGTCGACGCGCTAACCATAATGGAGTTTAGTGACAGGCACCGTCCCGCAAATCCGCTGACAGTGTTCGCCACGATAAACATAAGTCGCGGGGGCGAAGAAATAAATCAAATTAACATAGTTCGCTCCGATTCTCGGTGGTCATCCGATAAAATGGGCGGCCCAAAAAAAGGTATGGAAGTTTCTCCGCGCTACTTATCGGACAGTATTTGACAGATTGACACTTCCCAGCTTCTCGGGTTTGGTAGGATACTCGTCGTACGCATACCCAACTGTGCATCCCGGGTATGAATATAGGTCAGCTATGTCACTTAGTATTCGGACCACGCGTCGCCCTCGTCACAAAGTTTTGACGTAGCATATCGGGGCGCTACGGCGAGCAGACTTCGGCGCTTAACCACGGCAGCCTGATGTTTTGTACGGGCGATTGCACAGTCGTCCTAATAGCGCGTCAAGTCGACGCTAAAACAAGTACACTAACAGGTACGTGAAGACGGAATGTTCTGATGAATTAAGCCTGATAAACATTATCGTTTCTTTCGAGTCCCAGTTTTCCCCTTGAAGCAGAGAACGTGTATGTGAGTCTCTAGTAAGTCCCCTCGTCCAATATTTCGGAGCAGTTGCGGTCGCGCGTAGGCGGGTCGTAGCACCATCTTTAACCCACTAAGCCCGTCATCAAACAATTAAGCGAAGACCCCGTCAGACCCTTACGGTTATGAAACACTCCTTGGACATCAAGTACTTGCCAGCTAGCCGACTATCCTTCACATCTTCACTGTTTACTCTTGTTGGACAAAATATGAGTTAAGCAACTAATGTAACGCGATCGGCTCACGTAACAATGCTAGCAGGTTCACAGCGTAATAGAACTTACCGCTCTGCGAGTGTCCAACGGGAGTCTCCGTTAGCCTACAGGCCAGTACCA +10 ATTGCTCGAACCGCGCGAGTGACTCTTACGTCAGACTTCCTAATCCATACTAGCCTATTGAAAATTGCAGTGCGTTGAGCAAAAGGTCTCTACGAGGCCGTGTGTCCTATAGGGTAGGTTAAGCTAGACCTTCCGTCGCCGTTATCCTACATGATGTTCTTTAGAGAGGGATCCGGGTTTACATGGACATACTCAATATGCTATCGCGGCGTCTCAGGGCCGAGTTTTGCGAGAGAATAACACGTATCGCAGAGAAACGAAGTACTATTTTGCTGAGGAACGGCCACTGTGTGTTGTTGTCAACAAGGAGATTACGAACGGCATAGGCCAGTATATGTCAGCCAGTTGATAAATTGTCCTCTGCTGGCTGTATTGTAGCAGAGCACTAAAGAGCACGATCACATATGCCTGGGTCGGACCTTGGGCCATTTATGCGGGATTTCAGAACAATAGGCACGTCTACACGATGTCGGAGGTTTAAAGGGATACTTTAGGTCGCAGGGTCTTCCCGACTTAGATATTCCCGGATAGGCAGGCGTTACGCAATGCGTATATATGGGGTTATACACCTGTGGGCTGCATGCAGCTAACTAACCTAACCGTATTCAATAAGAGCATACGAAGTTGACGCCACAGCGGGCCGGAATTACGGCTAACTTTACGACTGTAACGCGATCGTCTCTGCGCGGGACCTCTGGCTTCCGCCAAGTGAAACGCTCCCCAGCCAATTTATCTCCCAGCCCTGACAGCCAGGCTCTTGTAGCCGGTCTACGCGTTCTTCCTCGAGGAACAGGCAGGAATGTACAAGCAAATTACACTTATATCATGCGGTTCGTCAAGCATGTGATCCGGACAGATTTGAACAAGAGCCGACTCTTACTTCCCTTTCAAACCGAGACTATGACTTCCAGATGAATGCCTGGTGGCCATAGTATTGGTCTCACTAGTCGTGGAAGGAGGGACACGTTGTGGAAGACCTTTTGGTAAGTAACACTATCATCGCATAGCAACCACCTCACTTACCTAAGTGGCATGCTGAGGCCAGCAGTGCCCCTAGCCTAAGTATCATTCGAGCGTCACTTTAACCCGAAATACACGACTACCCTAACTACTTTACTAGCTATATAATTGGTAGTCTTGGACTGTTATAGTGGGCACGAGAGGCCTAAATCAGGTACTGATTAAGAAGCAAGGTGCCCGCGCGTGCAATTGCGGTCCATAGGAAACGTCGGTTCGCCCGACCGGAAGTACCCGACGTCTTGGATAATTCTAGTCTGACCAATACGATAATTAGCCTCAGTGCCCTCTGGGACTTGAACGAGCTGCCGCCTTCACGGCCGGCGAGCCTTGGCTCCCAGGTAAATCATGACATCCTTGGGGTATTGCAGGACGGCTTCGCCCATGGCCCGGCTCGACCGGACCCCTTAACACTGGGACGACTTGACAGTGGCCGTCGAAACAACTACCTCATTGGGGATGATGACTCACGATGGTCTTCCAGACCCAGGTCTAGGAAAGGCTCTAGGTCAGAGAGATCCAGCCATTCTCTTCAAGAACCACTTACCCAACAGGCTTTCGAAGCACTACCGCTTTTCGGCCGATTGGGGCCACATGTGTGTCTTACGGACGCACGAAACCAGGGGGTATACCATTCTCGGGGCGATATGGTCGGACGGTTTGCGCGGCTTTACGGACCATAGAGCGCGCTTGCACGTACCTATTTAGGAATTCCGTGCAAGCTTAAAGAAGGTCGGTTTGTAAGCAGATAGCAGTAGATCGTGCTTCGTGAGACTTTTTGACAAAGTCATAGGCATTGGGCTCTGCCAAAACCTAGTGGCACTCTTGGCCGAACCAGGGGCCGCGATTCTCAGGGCATCGCTTCGATGAATGATGGGATTTGCTATACCGGCAACCACGTAAAGTGTGTAGGTCACAGGAAGAGCCAACAGTGAGCCGGCGTCTAAGTTTAGACTGGTAAAGGACGCCAGATCCTAGTGCTGCAACGCGATTCTTGCGTGGCTCTGTTAGTGCCTCGAAAGGCGTAACTGACATTGTCGATCACGGGTCGCGCCGGTGTGACGATCTTAGCCGACTTCTAAGAACGCAGGTCTGCCCATACAGACTGTCAAATGTTCTAGAGTGTACGGGAATTAGGCACTAGGCTAAACTTTAAGCAACGACCCGTGGGTGAGCATCGGCCTGGAGAACCCCCGTCAGCAAGGGCGTCGGAGATATGGGATGAAGGCTATGCAACACTCCAGGATAGCAGCGTACGCTCCGTGTTTGAAAGTATTGCAAGTAGCACAAAGACCTCATACAGCTTGTGACGCATCGTACGGGGTGGCCTGCGCGGGGGAGTGCGATTTACCACGTCATTGTCTGCCCAGGTCAGTAGTCCCCTTAATTTTACCGGGGGGACGCACAACAATTGCCGCATAACGAAACATCTCGGCATGAGTAGGGATAAGCCTCAACCTAGTGTAAGCACAGCGCCCCGAGCCTCGAGGAAGTGATTTTCTTTGTTTGGCACATATAACAGTACTAGCAGGCTATGCGTGTACATTTGTCCATCCTGTCCGTCCAAGGAGATGAATCCATTCACTCTTTTTCCGTATTGCCCCCACAACTGTTCTTAGATTCAAATGGAGCTTAGCGTAAGATAGTCGAACATGTATGTCCAAAGTCTTGGGAGGCCTGCGCAGCGTTGTGACAATACCGATGGCCACTAAGGTGTTACACATTACTGGCTATAAGGATATAAATCCGGTCGACATAAGACTTACGTTCAGTTGTCTCTTTATGGTGGGCCGTCGTTGGCGCCATTGAATCTTGAATTACCATTGTGACTTATCGGGACTCGTATGGGTATGGATTCGCCAGAGCCCGTATCGCGTGGTATAGGATAGTCAAGAGGCAAAATTTTTTGCACTTGTGTTAGCGTTCATCTGTGCGTCCCGGAATCTATGGAATTGACTACCGAGTAAAGTGGAGTAACGCCGGGAAAAGGCCGTAGAAATCCCACGCTAGGCCAAAACTAACCCGGAACAAACGGCATCCTTCATATTAGCAACTTGTCTAGTATAATGGAGGCTTTCAGATAAAGCCAATCTTCTGAGTATCGTGACCCCCCGATAGGCTGGTCGTGTCAGGGTCTAGGCGCCGTCACATTGCCGCTGTGAGGCTCGCCACTCGGTTCAATTTTAAAATGTGCTAAGCCTCTAACGACCGGGGCCACGAGCTTTGCTAGTTCACCTATGGGTGTTTAGGCGACGCCAGCCGCGGATTTCTGACAATAAAGGTTATCAACTTAACAGGAGGTGACGATCAGATGCACTCGCAGGTCCTGTCTGAGGTGAGAGGTCTTACGTACATAATGAGTCGTCTAAACGACGAGGTAGCATGGCAATTAAGCATTAGCGGAGGCGCGGTGGGCTTGCTACAAAACACGAGTTCGGGGGCGAGGCGTACCTGTTCTAAATCCGCAGTGAACTATGACATGTCACGCTGCCCTGGTCAAGTGTACCGAGTGACTTAGCCTCGCTTCGTACCTTTTAGTGTTTCATCAACTTCACAGGGCCTGATCCGTGACTTACCCCAAAGGGCCCTGGCCTTTATCCACATTGCGAGACATGCCGTTGCGGCCAACTAATATCTACCTTTATTAAATTGGGGGGCTGTTCCTTAAAGAAGATTCGAGTTGCCTCAGACTCCTCGGACGTGCTCGAACGTTCAAGTCATGGTGGAACCCGCCCTGGAAGACAGGTCCAAAATCATTTGCTGGTACAATTATGAGCGGTCAATGTGCTAGAATTAACGGGTTAACCGTTGTTCTGTACTGTGAATTGCATAGCGATCTGGGTAGCTAGTATTTCTGCACCCCCATCCAATCGCGGTGTGAAAGATCCACGGATGATACTCTCGCGGCCTCGCTACCCCTCAGAAATGAGCCCCGCTTAGGGTAATGAACCAACAATATTCACTGGGGAAGCTCATGGGGACAGGGGTACAAGGCACCTGGCTGAGCAATTACCCAGGATTGAAGCGCGGTCCTAGTTGGAGGAAGACCGCACCCGGGTAGCAGTCAGTCACCCTATCCTCCGGAAGAGGATGGGGCGTAACCTTATCTCTACAGTTCGCCGCAATCACTTGTCAAGCAGGTAGCCATTTAGTACCATCGTGCCATCATCGTGATCTGCGTCTGGGGCTGCGCGTGCTGCCCGTGGTCATAAGAAGTGAGTCCTTCCTCTTGAACTTCCGCGGATGTGCCCCAGAGGTAGGAGACACATACAACATTGCTCTCGACACAATGTGATTGTTAAACGACCACTGAGACCCCATTTCTGTGTTTGCCGTTTACAACCCAGTCAATTTTAATACCTTAGACGGCTTGTATTATTCCGACAGCATAGTGTGTTATGCGTATGCTTATACAGAAGACGCAACGTGGTGGAGGTTACACTATAGCCAGCCCCCAGCATGGGGGGCCGTTATATGGCTTCAGTTAACAACAAAGTTACTGTGCCTCCAGGGGACTAGTTCTCCGACTTGAGATATGATACACCTACCCCAACCATGAATAACACACTCATAGGCCAGAAGGCTGCTTACCGGCAGATAGTGGTAAATTGATCGATGATGAGTGTGGGATTATTAACTGACAATTGCTAGGCGCCGCTGCTTCAAGGATCCGCGAGGTTGCACGTCCGTAGGGTTGCTAGATGGAGCCGTGATTACAAGGGAAAATGTGACCGCGCCCATCCTAGACCAGGATCTGGAGGGTAACCGACCTACCCAGAAGACGTATGACCTCACATCCCCTTGATCTGTGCACAACTTGGTTGCAGGTATGTTCATCTCCGGGGATTATTGCCATCAGGCTACGCGGTATTTCCGAAGACGCGTTGAAGATACAGTGCATGACATGCCACCAAGCCTTTGGAGGCATCAGTGGCCATCGATTCTATGGTTTAACTGACGCGGTCAATGCAGACATTAGTGAAGGCGTTCTGGCTGGCCGTTAATATCGGTTCGGGGATTGTCGTCCTCGTTTTGCGGGTTTGACTCCAAATGGTATAGAAATCGGTCACGGCAGTTTCTAAAAATTACTCGGAGCATTTTTGTTTGCTGTGAACGCGGGCCTTGGGGGATGGCCGAGTCGTAAGGGACACACCCCGAATACCTAAGTGAAAGCGTGTGTCCCTATTAATGGACTAGCGCATGGGTCGACTCCCAACCGAAGTAGGACTCCAAAAACGCGATATTAGGCGGTCTCGTCAGGACTTCATGGCGAGATGCTGAGCCGTATCCTATTTTGTACTCTTTGCGGATTAAGGAAAGACCCCGTTTCATAGATAACTAGTCGGATCAAAAGAGATTCTTTGCGTCAGGCGTAAATGAAGCATAATCCAATATTTTGCTCTTCTTGCCATTCCTGCCTTCTCGATCGCATCCCTGTCCAATGAATCTCCCCACTCCATTGCCGACTTTCGGTCACGGTCTCCCGCCGTAAGTCGTTAGAACACATTGTGAGCAGTAACGTAGTCTTGTCCGTGAGACGTAAAATACTGATAATACGTTGGTTGGGAGGACTGTTCGGGTCCAGCTACAAGGGCCTTCAGCCCGTTTTCGATGGCTTCCGATGTATGTTACAGTGACCGACATATCCTCGATTGTATTGCGGGACACGTACATCAAACAGTAGACTTTTAACATAAAGACACAAAGCTATAACCGTGTTTTGACGGTACATGGACACTCACCAGCGATCTTCGAGGCGTAGCATGGCATTCATGGAGGACGCTCTACATGTATGATTGGGCGCGTCCAAAAGTGGATCGCATAAAATCTCCGGGTCGAGGGTTTGGGGGGTACTCTCTATAACAAGGAGATTTAAAATACGCGTGACCATACTGCAGTACGGCCCAAAAGGAAATGCGTTCCCGGGTACAGCTTTTTGTTGCGCCGGTTTGATGATATTGTGGCAGATAATCCACCTCGGGGGTAGGCTGTTCTTGGATGCTGTATGAGATGTCATGGCCGCATACCACCGGAGCAACGTGGCGAGTACTCGACCTAGGCCTTCCCACCTGAGAGCATGAGCTGCTGCGATATTCGAGTTGTCTCAGTGATTTACTTATCTGATTCGCGGCCCCACTGACGGCCAGACCACCTCCGCCCCCTGGAATAAAACGCGTCCGGGAGAGACTCCACTCATGCAATGAATTGGTTTCAAAGTGCAGCCCAGTGCGTCCATTGGGACGTTGGTTGCATGCACCGGAATGATGAATACCCCGGTAAAGGCGTGGCGTTCTATGTCTTAGTTTGCTCCGGAGATCTGCAACATTAGTTCCTACCTACATACAGACTATTGCATCTGTAGCAGGAGAACACGGGTCTTCGCGGACCATTTTTGTCAATGTTCAACAACTTAGAACTAAAGTTTGGGCCTGCCATATCCGCCCCTAGTTGCGTGGTTTTATGTCTATACGGATCCCAACCCGTGCTTAAATAGGTTATATGAAGGCTATCGCTTTAATAACGTGTTAGGACGATCCCGCCTCGCAGTTCGAAATGCCTGCTCTCCCATTCAATAATTGCCTTAAGAAACGTGGTTGCACTGGTCTCATCAGACTACTGGACTGAGTACTGATGCAGACCGGCTACGTTTAGACGGTCTCGCCATGTACGCAGTGGCGTGATTGAATACCCCCCCGACGATATCTGATCGCACCCGAACACCCAAGTGAAGCAAAGCACACGGGTCAGTACCATCCTGCAACTCCGTAAATAAGGGTTATCGGGGCCGCGTACGGCCCCCATGATTTATTCTCGATTAGTTGAAAGTTTCTCTTATGTCCGGGCCTCGAGTGTCCTCAGTACAGCAAAGGCACTTATCGGCTTTCTCCTTCAGTTCAAGTCAAGCTTTATCACTCAGGATCCGTGTTTCATATGTCATGTGGACAGTTCCAAACTGTGCGGCCTATGTCGGAGGTCAGGCGGTTTATACCGTGATATATCCTATCTCCGCTTGTATCAGCGTGCTTATTGACCTCAGTTTCGTACGAATCAAAGTTTCACGGTAATTACCTTCCGAATTTGGGCCAGATGATTCTTTGCTCCCACGACTGCATTGTTCCCGTGGACTTCTGAGATGTGACGTTCCAATCTTCAACGATACGGGGGGATATAAATGGCTAGTTCCTCAATAAGCTACCATTCAGGAATGTGTGTGTTAACGGTTCCCATCGGGATCCAGTCGAAGCGACCCCTCGTGGTGGATTGACATCGAACTGTAGCCTACAATGCAACCTGCTCGGCAGGGCCCGGGGTGCCAGCAATACGAGAACGTCTCCGGAAGTGGTCCGGGTACGTTTGCACCGGATAATCTTGATTCGCCGCAGCTGGCATATCGCAAGCTATCTGGCCCCGAGAGAGCCAAGCACCTATGTCGGTCATGAGTAGGCTGGACCTAGGCTAGTGTACTTGGGCAGTTGCACCGGTATTGCGGCTGGCTCGACATAACGCCGACTCCGCGATATCTGGTCAATCTAGTCATAGCGCCATTGTTCCTCGAGCTATGGCACTGCAATAGCAAAGTCACATCATGACGCTTCCGGCTGTTTTAGTTTCAGGTGTTATGTGGGGCTTCAAGTTGCGTGCGAGATAGATGAGGGGGTACCAGAAGCAAGTCAAACTACGGCAGGCGTGATGCTCAGAATTTGAGTGTGCAGTAGAGTAATATGGCCCGCACCGTTTCCTCCATGAGGTTCGTTATGGCTTACACTGTGTAAATTATAAACGAGCACCGGTTTTTGCTTGGCGGTTCTGCAATCTAAACCCTGCCTTGGAAAGTGGTTTAGTAGGGATGTGATTTCACGACTTCATAGTAGGTTACTCTCAGGTCTTACAACACTTGGCGGGCGGCTACAAGAACCTGTACCACCGTGATGGAGAGCCGCACCCATGTATTACACATTAGCAGGCCACTTTATGGCGAGTAGATGGTTCATACGTAGGGCGGACTGTTTCCGCCGTCTGCTCAGGAACCGGACATTGATTTTTACGTCTCCCCGCACAGGGGCGGGCAACTTAAGGGTTGCTAAGAAGAAAACACACGGGCGCAACGGGCAATACGTCCACTAGACGGCGATCGCGACTCGGACATGAGGAACCCGGGTAAGCCCATCTCAGTTAACTATAGGGTTGGAGCCTGAGGGGGTTAAGCGCATTCGGGTTCGTTTTCGTGGCAGTACTCCCGCAATACAACCTTTGGCAGCACAAGTCACGTTAAAAAAGACGGTCGAGGACCCAAGTAGTTAAAGTTTGGCGCCCTCTCGCCCTCTACAATGCTGCCGTTGGTAGCGTATTGCCCCACTTGTATGACACGCCATCTGCGCGTGTGGATGTCGGAGAGCTGAGAGCGCCTTTCTTGGGATGCGTACTAGAGCAGATGGAGCTAAAGCGCATGAAGTATACTAGTGTTCAGCGCACTCATCGCACCCGCCAAGAACTATCTCGTTATGGCACTGTACATAGGTGTACAGCTTTCCGTACAGAAGGGACAGGTGTAGACTTGGGTACGACGTCCTGCTGCTGGAGGACATACCCCCAGCGGGCCATCGGCTCTGTTAAAGCGAGCCAGTCACATCCAGAGGTCGATCCCATGAGCGTCTCCTGCGCTAGGTCCTCACTCTTAGAGACCTTCGGCAAGTAACTGAATCTAGCGGCAAGGCCCGTAGCTTTAGCGGACGAAAATGCAAAACCTGTCAATTATTACAATGACATGCCATAGACAATACTCGTGTCCCGAAGAGTACATCGGCGATGCGCCGAAGTTAAGCAAACCCCGCTGCTTAATCGGTTATCTCATACCAAGCCAAGATAAAGTGACGACCGATTTATAGCAACCAATCAGCAACCACGATACAACGAGAGGCAGTTGGAAAGCAGGCACCGCGTGTAAATTTTCGAATGCCCTGTAACACGAAAGGGTGTTCTCCTACATTCCTCCTGCCCCTCGGCGACAAACTTATCCCGACTATGCCAAGTCACACATCCAAAGGAGTTGTAACACAGTTATCCAGTCTGTTAGAGAGTCACCCCAGACATAGAAAGGCTCCGTCCGAAAATAAATGACGGGGAGTTATATCAATCACATCCTACCCGGGACTGGGTCTCGTCAGAGGTACACTCACGATTCACCCTTAGAATCGAACACTTAGGGGCCCAATACCGTACAGCTCGGTGATGTCTGGGTCAACATTACTCGTCGTCGCTATTTTTGTCCCGTTTCTCCCCTAAGGGTAGATAGCTTTATGGGGGCCTGAGCGATACCATGGACTGTGGACTCAGAGTTTCTATATCGCCATGCAAGACTCACGCTAGGTCATGCAATGCGCCCGCGAATAACATGCGAAGGTCCCTATTATAGTACCAGCACCTATCTGCAATGAGATGAATTCCGGAAGCGACAAACAGGTGCCCGGCTGCCGCGAAGGATTACGATGTCTCTAGTTTCCCGTCGGCCGTATTACACAGCTACCCTAACATTTCCCCTCCAGCCCAAGTTCGGGAGCTGCACCTTTTTTGACTTCATTCTCGCGTGAGCGAACATTCGAAGCAATTGCTATAAATTTTATATGGGTTCTGCGTTGTATCAAAGACTTTTACTTTTGAGTCTCTATATATCGTTAGTGCTCACCCAGAAAATACGGTAGCGTTATGTAGGAGTCAAGGGCAGTGGACTCGACCGATGTCCTAGGGTTTTCGACACAAGTACACGAATGGTGGGAGGACAACCAGTTATAGTGTGCTAAATTTTACTTCGCGAGTAAGAGTCGAATACACAGGCGGGCAAGCTCTAGGAAAGAAGTATTTGATGTAAAAACAGTTGAGTTTGCTGGACAGTACACGGTGGGGATCGACGCTCCCGCATACTCGCTTGGCACCCCTTCAGTATCTTCATCACTAAATCGGTGTCCGGGAATTAACAAAATAGTAG +; +end; + + diff --git a/treeLike_JCGAMMA.py b/JC+GAMMA/test3.py similarity index 61% rename from treeLike_JCGAMMA.py rename to JC+GAMMA/test3.py index 6a777cf..6cded7e 100644 --- a/treeLike_JCGAMMA.py +++ b/JC+GAMMA/test3.py @@ -1,6 +1,6 @@ ################################## # This script reads a nexus DNA matrix (through module readseq.py) and a newick tree -# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model +# topology, and computes log-likelihood of the topology under Jukes Cantor model ################################### @@ -10,17 +10,6 @@ from itertools import chain from scipy.stats import gamma from math import exp, log - -########################################################################################## -tree_file_name = 'tree.tre' -sequence_file = 'example3.nex' -alpha = 0.5 #gamma shape parameter for rate categories -########################################################################################## - - - - - class node(object): def __init__(self, ndnum): # initialization function self.rsib = None # right sibling @@ -31,7 +20,120 @@ def __init__(self, ndnum): # initialization function self.descendants = set([ndnum]) # set containing descendant leaf set self.partial = None # will have length 4*npatterns + def allocatePartial(self, patterns, rates): + if self.number > 0: + npatterns = len(patterns) +# print 'npat', npatterns + self.partial = [0.0]*(4*4*npatterns) +# print len(self.partial) + for i,pattern in enumerate(patterns.keys()): + base = pattern[self.number-1] + for l in range(4): + if base == 'A': + self.partial[i*16+l*4 + 0] = 1.0 + elif base == 'C': + self.partial[i*16+l*4 + 1] = 1.0 + elif base == 'G': + self.partial[i*16+l*4 + 2] = 1.0 + elif base == 'T': + self.partial[i*16+l*4 + 3] = 1.0 + else: + assert(False), 'oops, something went horribly wrong!' + + else: +# rt = [0.03338775, 0.25191592, 0.82026848, 2.89442785] +# rt = [2.89442785] + +# rt = [1.0, 1.0, 1.0, 1.0] + npatterns = len(patterns) +# print 'patterns=', patterns + self.partial = [0.0]*(4*4*npatterns) + like_list = [] + for i,pattern in enumerate(patterns.keys()): +# print i, pattern, patterns.keys() + m_list = [] + num_pattern = patterns[pattern] +# print num_pattern + + for l,m in enumerate(rates): + + + psame = (0.25+0.75*exp(-4.0*m*(self.lchild.edgelen)/3.0)) + pdiff = (0.25-0.25*exp(-4.0*m*(self.lchild.edgelen)/3.0)) + + psame2 = (0.25+0.75*exp(-4.0*m*(self.lchild.rsib.edgelen)/3.0)) + pdiff2 = (0.25-0.25*exp(-4.0*m*(self.lchild.rsib.edgelen)/3.0)) + + num_pattern = patterns[pattern] + pAA = psame*(self.lchild.partial[i*16+l*4 + 0]) + pAC = pdiff*(self.lchild.partial[i*16+l*4 + 1]) + pAG = pdiff*(self.lchild.partial[i*16+l*4 + 2]) + pAT = pdiff*(self.lchild.partial[i*16+l*4 + 3]) + + pAA2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pAC2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pAG2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pAT2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromA_lchild = pAA+pAC+pAG+pAT + pfromA_rchild = pAA2+pAC2+pAG2+pAT2 + self.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild + + + ###################################################### + + pCA = pdiff*(self.lchild.partial[i*16+l*4 + 0]) + pCC = psame*(self.lchild.partial[i*16+l*4 + 1]) + pCG = pdiff*(self.lchild.partial[i*16+l*4 + 2]) + pCT = pdiff*(self.lchild.partial[i*16+l*4 + 3]) + + pCA2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pCC2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pCG2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pCT2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromC_lchild = pCA+pCC+pCG+pCT + pfromC_rchild = pCA2+pCC2+pCG2+pCT2 + self.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild + + ####################################################### + # + pGA = pdiff*(self.lchild.partial[i*16+l*4 + 0]) + pGC = pdiff*(self.lchild.partial[i*16+l*4 + 1]) + pGG = psame*(self.lchild.partial[i*16+l*4 + 2]) + pGT = pdiff*(self.lchild.partial[i*16+l*4 + 3]) + + pGA2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pGC2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pGG2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pGT2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromG_lchild = pGA+pGC+pGG+pGT + pfromG_rchild = pGA2+pGC2+pGG2+pGT2 + self.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild + + ####################################################### + + pTA = pdiff*(self.lchild.partial[i*16+l*4 + 0]) + pTC = pdiff*(self.lchild.partial[i*16+l*4 + 1]) + pTG = pdiff*(self.lchild.partial[i*16+l*4 + 2]) + pTT = psame*(self.lchild.partial[i*16+l*4 + 3]) + + pTA2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pTC2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pTG2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pTT2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromT_lchild = pTA+pTC+pTG+pTT + pfromT_rchild = pTA2+pTC2+pTG2+pTT2 + self.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild + + site_like = (sum(self.partial[i*16:i*16+16]))*0.25*0.25 + site_log_like = (log(site_like))*num_pattern + like_list.append(site_log_like) + log_like = sum(like_list) + return log_like def __str__(self): @@ -51,131 +153,16 @@ def __str__(self): parstr = '%d' % self.par.number return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - - -def allocatePartial(node, patterns, rates): - if node.number > 0: - npatterns = len(patterns) -# print 'npat', npatterns - node.partial = [0.0]*(4*4*npatterns) -# print len(node.partial) - for i,pattern in enumerate(patterns.keys()): - base = pattern[node.number-1] - for l in range(4): - if base == 'A': - node.partial[i*16+l*4 + 0] = 1.0 - elif base == 'C': - node.partial[i*16+l*4 + 1] = 1.0 - elif base == 'G': - node.partial[i*16+l*4 + 2] = 1.0 - elif base == 'T': - node.partial[i*16+l*4 + 3] = 1.0 - else: - assert(False), 'oops, something went horribly wrong!' - - else: -# rt = [0.03338775, 0.25191592, 0.82026848, 2.89442785] -# rt = [2.89442785] - -# rt = [1.0, 1.0, 1.0, 1.0] - - npatterns = len(patterns) -# print 'patterns=', patterns - node.partial = [0.0]*(4*4*npatterns) - like_list = [] - for i,pattern in enumerate(patterns.keys()): -# print i, pattern, patterns.keys() - m_list = [] - num_pattern = patterns[pattern] -# print num_pattern - - for l,m in enumerate(rates): - - - psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - pdiff = (0.25-0.25*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - - psame2 = (0.25+0.75*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - pdiff2 = (0.25-0.25*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - - num_pattern = patterns[pattern] - pAA = psame*(node.lchild.partial[i*16+l*4 + 0]) - pAC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pAG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pAT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pAA2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pAC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pAG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pAT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromA_lchild = pAA+pAC+pAG+pAT - pfromA_rchild = pAA2+pAC2+pAG2+pAT2 - node.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild - - - ###################################################### - - pCA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pCC = psame*(node.lchild.partial[i*16+l*4 + 1]) - pCG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pCT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pCA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pCC2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pCG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pCT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromC_lchild = pCA+pCC+pCG+pCT - pfromC_rchild = pCA2+pCC2+pCG2+pCT2 - node.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild - - ####################################################### -# - pGA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pGC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pGG = psame*(node.lchild.partial[i*16+l*4 + 2]) - pGT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pGA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pGC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pGG2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pGT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromG_lchild = pGA+pGC+pGG+pGT - pfromG_rchild = pGA2+pGC2+pGG2+pGT2 - node.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild - - ####################################################### - - pTA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pTC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pTG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pTT = psame*(node.lchild.partial[i*16+l*4 + 3]) - - pTA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pTC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pTG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pTT2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromT_lchild = pTA+pTC+pTG+pTT - pfromT_rchild = pTA2+pTC2+pTG2+pTT2 - node.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild - - site_like = (sum(node.partial[i*16:i*16+16]))*0.25*0.25 - site_log_like = (log(site_like))*num_pattern - like_list.append(site_log_like) - log_like = sum(like_list) - return log_like - def treenewick(): script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - path = os.path.join(script_dir, tree_file_name) +# print script_dir + path = os.path.join(script_dir, 'tree.tre') with open(path, 'r') as content: newick = content.read() return newick -# +a = treenewick() + def gammaRates(alpha): bounds = [0.0, 0.25, 0.50, 0.75, 1.] @@ -191,7 +178,7 @@ def gammaRates(alpha): def prepareTree(postorder, patterns, rates): likelihood_lists = [] for nd in postorder: - likelihood_lists.append(allocatePartial(nd, patterns, rates)) + likelihood_lists.append(nd.allocatePartial(patterns, rates)) print 'log-likelihood of the topology =', likelihood_lists[-1] @@ -319,8 +306,6 @@ def readnewick(tree): post = pre[:] post.reverse() - for nd in post: - print nd.number, nd.edgelen return post def Makenewick(pre): @@ -407,7 +392,7 @@ def calcExpectedHeight(num_species, mu_over_s): # print ' newick: ',newick[0] alpha = 0.5 ### gamma shape parameter rate categories -# yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' + yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' rates_list = gammaRates(alpha) - postorder = readnewick(treenewick()) - result = prepareTree(postorder, readSeq.patterns(sequence_file), rates_list) + postorder = readnewick(yuletree) + result = prepareTree(postorder, readSeq.patterns(), rates_list) \ No newline at end of file diff --git a/JC+GAMMA/tree.tre b/JC+GAMMA/tree.tre new file mode 100644 index 0000000..2290a90 --- /dev/null +++ b/JC+GAMMA/tree.tre @@ -0,0 +1 @@ +(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112) \ No newline at end of file diff --git a/treeLike.py b/JC+GAMMA/treeLike.py similarity index 97% rename from treeLike.py rename to JC+GAMMA/treeLike.py index 1142e5e..c3007fa 100644 --- a/treeLike.py +++ b/JC+GAMMA/treeLike.py @@ -9,13 +9,6 @@ import re, os, itertools, sys, glob from itertools import chain from math import exp, log - -########################################################################################## -tree_file_name = 'tree.tre' -sequence_file = 'example3.nex' -########################################################################################## - - class node(object): def __init__(self, ndnum): # initialization function self.rsib = None # right sibling @@ -147,7 +140,7 @@ def allocatePartial(node, patterns): def treenewick(): script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - path = os.path.join(script_dir, tree_file_name) + path = os.path.join(script_dir, 'tree.tre') with open(path, 'r') as content: newick = content.read() return newick @@ -371,4 +364,4 @@ def calcExpectedHeight(num_species, mu_over_s): # yuletree = '(((1:0.03915,5:0.03915):0.387,(4:0.42253,2:0.42253):0.004):0.118,3:0.54433)' postorder = readnewick(treenewick()) - result = prepareTree(postorder, readSeq.patterns(sequence_file)) \ No newline at end of file + result = prepareTree(postorder, readSeq.patterns()) \ No newline at end of file diff --git a/brnlenMCMC.py b/brnlenMCMC/brnlenMCMC.py similarity index 99% rename from brnlenMCMC.py rename to brnlenMCMC/brnlenMCMC.py index 87c6160..b63467a 100644 --- a/brnlenMCMC.py +++ b/brnlenMCMC/brnlenMCMC.py @@ -17,8 +17,8 @@ tree_file_name = 'tree.tre' sequence_file = 'example3.nex' alpha = 0.5 #gamma shape parameter for rate categories -n_gen = 50000 -save_every = 50 +n_gen = 4 +save_every = 1 mean_expo = 10. #mean_expo = mean of exponential distribution for branch length prior ########################################################################################## @@ -89,7 +89,6 @@ def allocatePartial(node, patterns, rates): for i,pattern in enumerate(patterns.keys()): m_list = [] num_pattern = patterns[pattern] - for l,m in enumerate(rates): psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0)) @@ -161,7 +160,6 @@ def allocatePartial(node, patterns, rates): pfromT_lchild = pTA+pTC+pTG+pTT pfromT_rchild = pTA2+pTC2+pTG2+pTT2 node.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild - site_like = (sum(node.partial[i*16:i*16+16]))*0.25*0.25 site_log_like = (log(site_like))*num_pattern like_list.append(site_log_like) @@ -171,7 +169,9 @@ def allocatePartial(node, patterns, rates): def mcmcbrn(postorder, patterns, rates): - nodes = readnewick(treenewick()) + nodes = postorder +# nodes = readnewick(treenewick()) + mcmc = 0 output = os.path.join('brnlenMCMC_results.txt') newf = open(output, 'w') diff --git a/readSeq.py b/brnlenMCMC/readSeq.py similarity index 100% rename from readSeq.py rename to brnlenMCMC/readSeq.py diff --git a/brnlenMCMC/readtree.py b/brnlenMCMC/readtree.py new file mode 100644 index 0000000..a7b4943 --- /dev/null +++ b/brnlenMCMC/readtree.py @@ -0,0 +1,22 @@ +# def treenewck(): +# script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) +# path = os.path.join(script_dir, 'nexus') +# for filename in glob.glob(os.path.join(path, '*.tre*')): +# f = open(filename, 'r').read() + +import re, os, glob, itertools, fnmatch, sys, shutil +# dirname, filename = os.path.split(os.path.abspath(__file__)) +def treenewick(): + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) +# print script_dir + path = os.path.join(script_dir, 'tree.tre') + with open(path, 'r') as content: + newick = content.read() + return newick +a = treenewick() +print a + + +# dirname, filename = os.path.split(os.path.abspath(__file__)) +# print "running from", dirname +# print "file is", filename \ No newline at end of file diff --git a/tree.tre b/brnlenMCMC/tree.tre similarity index 100% rename from tree.tre rename to brnlenMCMC/tree.tre diff --git a/mergeSeq.py b/mergeSeq.py deleted file mode 100644 index c28404a..0000000 --- a/mergeSeq.py +++ /dev/null @@ -1,453 +0,0 @@ -import re, os, glob, itertools, fnmatch, sys, shutil -from capture_marglik_pol import capture_script - -from itertools import combinations - -script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) -path = os.path.join(script_dir, 'nexus') -#path = '/Users/suman/Documents/Postdoctoral_projects/Algae_data/two_tupules_run/Mar4/combo_script/nexus' - -first_combo = 700 -last_combo = 770 - -# create output directory, recursively deleting it if it already exists after asking permission -output = 'g-700-770' -if os.path.exists(output): - print 'Directory "%s" exists' % output - answer = raw_input('delete it (y/n)?') - if answer in ['y','yes','Y','Yes','YES']: - shutil.rmtree(output) - else: - sys.exit('Aborting because you did not answer "y" or "yes"') -os.mkdir(output) - -number_to_merge = 2 - -#### counting number of genes to be analyzed -number_of_genes = len(fnmatch.filter(os.listdir(path), '*.nex')) -number_gen = range(1, number_of_genes+1) - -#### counting number of nexus files to be written after combinations -combos = list(itertools.combinations(number_gen, number_to_merge)) -number_of_files = len(combos) -# print number_of_files - -# chunks are pieces of the chloroplast that have not been rearranged -# The largest chunk (1) has 7 genes, the smallest (16-23) have only 1 gene each -chunk_map = { -'rpl23':1, -'rpl2' :1, -'rps19':1, -'rps14':1, -'atpA' :1, -'psbI' :1, -'cemA' :1, -'psbE' :2, -'rps9' :2, -'ycf3' :2, -'rpl36':2, -'petD' :2, -'rpoA' :2, -'psbJ' :3, -'atpI' :3, -'psaJ' :3, -'rps12':3, -'rpl16':4, -'rpl14':4, -'rpl5' :4, -'rps8' :4, -'rpoBa':5, -'rpoBb':5, -'rps3' :5, -'rpoC2':5, -'psaB' :6, -'ccsA' :6, -'psbZ' :6, -'psbM' :6, -'psbF' :7, -'psbL' :7, -'petG' :7, -'psbB' :8, -'psbT' :8, -'rps4' :9, -'clpP' :9, -'rps18':10, -'petB' :10, -'rps7' :11, -'atpE' :11, -'atpH' :12, -'atpF' :12, -'psbH' :13, -'psbK' :13, -'psaC' :14, -'petL' :14, -'rbcL' :15, -'rps11':15, -'rpl20':16, -'tufA' :17, -'psaA' :18, -'psbD' :19, -'atpB' :20, -'psbA' :21, -'psbC' :22, -'psbN' :23 -} - -##### nexus file for each merged pair - -new_way = True -genes = [] -data = {} -gene_javascript_data = [] -print 'Reading nexus files...' -for filename in glob.glob(os.path.join(path, '*.nex')): -# print 'Reading file:',filename - #pol m = re.match('(.+)[.]nex', os.path.basename(filename)) - m = re.match('(.+)[-]stripped[.]nex', os.path.basename(filename)) - gene_name = m.group(1) - - genes.append(gene_name) - f = open(filename, 'r').read() - m = re.search('ntax\s*=\s*(\d+)', f, re.M | re.S) - ntax = int(m.group(1)) - m = re.search('nchar\s*=\s*(\d+)', f, re.M | re.S) - nchar = int(m.group(1)) - - gene_javascript_data.append((chunk_map[gene_name], gene_name, nchar)) - - m = re.search('Matrix\s+(.+?);', f, re.M | re.S) - matrix = m.group(1).strip() - - matrix_lines = matrix.split('\n') - - taxon_names = [] - sequences = {} - for line in matrix_lines: - parts = line.strip().split() - assert len(parts) == 2 - taxon_name = parts[0] - sequence = parts[1] - taxon_names.append(taxon_name) - sequences[taxon_name] = sequence - - if not new_way: - if len(data) == 0: - for t in taxon_names: - data[t] = [] - - for t in taxon_names: - if new_way: - group = data.setdefault(t, []) - group.append(sequences[t]) - else: - data[t].append(sequences[t]) - -print 'Found %d genes' % len(genes) -for c,g,n in sorted(gene_javascript_data): - print ' {name:"%s", chunk:%d, seqlen:%d},' % (g,c,n) - -taxa_sorted = sorted(data.keys()) -# print "taxa_sorted=", taxa_sorted - -number_of_genes = len(genes) -number_gen = range(1, number_of_genes+1) - -#### counting number of nexus files to be written after combinations -combos = list(itertools.combinations(number_gen, number_to_merge)) -number_of_combos = len(combos) -print 'Number of combinations:',number_of_combos - -combo_list=[] -current_combo = 0 -for a,b in combos: - current_combo += 1 - if current_combo <= first_combo or current_combo > last_combo: - continue - print 'current_combo=', current_combo-1 - - new_taxonlist = [] - se1 = [] - se2 = [] - new_sequences = [] - seq1 = [] - seq2 = [] - genes_merged = (genes[a-1]+'_'+genes[b-1]) -# print 'genes[a-1]==', genes[a-1] -# print 'genes[b-1]==', genes[b-1] -# - for t in taxa_sorted: - new_taxonlist.append((t,data[t][a-1],data[t][b-1])[0]) - new_sequences.append((data[t][a-1]+data[t][b-1])) - seq1.append((data[t][a-1])) - seq2.append((data[t][b-1])) -# print 'new_taxonlist==', new_taxonlist - ntax = len(new_taxonlist) - newnchar = len(new_sequences[0]) - nchar_gene1 = len(seq1[0]) - nchar_gene2 = len(seq2[0]) -# print 'nchar_gene1====', nchar_gene1 -# print 'nchar_gene2====', nchar_gene2 - - new_nexus_file = '%s.nex' % genes_merged - gene_dir = os.path.join(output, '%s' % (genes_merged)) - if not os.path.exists(gene_dir): - os.mkdir(gene_dir) - - combo_list.append(genes_merged) - - mrbayes_single_run1 = '''begin mrbayes; -delete Ankyra_judai Atractomorpha_echinata Bracteacoccus_aerius Bracteacoccus_minor Chlamydomonas_reinhardtii Chromochloris_zofingiensis Dunaliella_salina Floydiella_terrestris Gonium_pectorale Kirchneriella_aperta Mychonastes_homosphaera Oedogonium_cardiacum Ourococcus_multisporus Pleodorina_starrii Pseudomuriella_schumacherensis Rotundella_rotunda Schizomeris_leibleinii Stigeoclonium_helveticum Volvox_carteri; -charset first = 1-%d\\3; -charset second = 2-%d\\3; -charset third = 3-%d\\3; -partition mine = 3: first, second, third; -set partition=mine; -lset applyto=(all) nst=6 ngammacat=4 rates=gamma; -prset applyto=(all) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) ratepr=variable revmatpr=dirichlet(1,1,1,1,1,1) brlenspr=Unconstrained:GammaDir(1.0,0.100,1.0,1.0) shapepr=exponential(1.0); -unlink shape=(all) statefreq=(all) revmat=(all); -set seed=9223 swapseed=9223; -mcmcp ngen=8000000 samplefreq=500 printfreq=8000000 starttree=random nruns=1 nchains=1 savebrlens=yes filename=ss; -[sumt filename=mcmc;] -[mcmcp filename=ss;] -ss alpha=0.3 nsteps=30 burninss=-2; -end; -'''% (nchar_gene1, nchar_gene1, nchar_gene1) - - mrbayes_single_run2 = '''begin mrbayes; -delete Ankyra_judai Atractomorpha_echinata Bracteacoccus_aerius Bracteacoccus_minor Chlamydomonas_reinhardtii Chromochloris_zofingiensis Dunaliella_salina Floydiella_terrestris Gonium_pectorale Kirchneriella_aperta Mychonastes_homosphaera Oedogonium_cardiacum Ourococcus_multisporus Pleodorina_starrii Pseudomuriella_schumacherensis Rotundella_rotunda Schizomeris_leibleinii Stigeoclonium_helveticum Volvox_carteri; -charset first = 1-%d\\3; -charset second = 2-%d\\3; -charset third = 3-%d\\3; -partition mine = 3: first, second, third; -set partition=mine; -lset applyto=(all) nst=6 ngammacat=4 rates=gamma; -prset applyto=(all) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) ratepr=variable revmatpr=dirichlet(1,1,1,1,1,1) brlenspr=Unconstrained:GammaDir(1.0,0.100,1.0,1.0) shapepr=exponential(1.0); -unlink shape=(all) statefreq=(all) revmat=(all); -set seed=9223 swapseed=9223; -mcmcp ngen=8000000 samplefreq=500 printfreq=8000000 starttree=random nruns=1 nchains=1 savebrlens=yes filename=ss; -[sumt filename=mcmc;] -[mcmcp filename=ss;] -ss alpha=0.3 nsteps=30 burninss=-2; -end; -'''% (nchar_gene2, nchar_gene2, nchar_gene2) - - - mrbayes_concat_topo_brlen = '''begin mrbayes; -delete Ankyra_judai Atractomorpha_echinata Bracteacoccus_aerius Bracteacoccus_minor Chlamydomonas_reinhardtii Chromochloris_zofingiensis Dunaliella_salina Floydiella_terrestris Gonium_pectorale Kirchneriella_aperta Mychonastes_homosphaera Oedogonium_cardiacum Ourococcus_multisporus Pleodorina_starrii Pseudomuriella_schumacherensis Rotundella_rotunda Schizomeris_leibleinii Stigeoclonium_helveticum Volvox_carteri; -charset first = 1-%d\\3; -charset second = 2-%d\\3; -charset third = 3-%d\\3; -charset fourth = %d-%d\\3; -charset fifth = %d-%d\\3; -charset sixth = %d-%d\\3; -partition mine = 6: first, second, third, fourth, fifth, sixth; -set partition=mine; -lset applyto=(all) nst=6 ngammacat=4 rates=gamma; -prset applyto=(all) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) ratepr=variable revmatpr=dirichlet(1,1,1,1,1,1) brlenspr=Unconstrained:GammaDir(1.0,0.100,1.0,1.0) shapepr=exponential(1.0); -unlink shape=(all) statefreq=(all) revmat=(all); -set seed=9223 swapseed=9223; -mcmcp ngen=8000000 samplefreq=500 printfreq=8000000 starttree=random nruns=1 nchains=1 savebrlens=yes filename=ss; -[sumt filename=mcmc;] -[mcmcp filename=ss;] -ss alpha=0.3 nsteps=30 burninss=-2; -end; -'''% (nchar_gene1, nchar_gene1, nchar_gene1, nchar_gene1+1, newnchar, nchar_gene1+2, newnchar, nchar_gene1+3,newnchar) - - mrbayes_concat_topo = '''begin mrbayes; -delete Ankyra_judai Atractomorpha_echinata Bracteacoccus_aerius Bracteacoccus_minor Chlamydomonas_reinhardtii Chromochloris_zofingiensis Dunaliella_salina Floydiella_terrestris Gonium_pectorale Kirchneriella_aperta Mychonastes_homosphaera Oedogonium_cardiacum Ourococcus_multisporus Pleodorina_starrii Pseudomuriella_schumacherensis Rotundella_rotunda Schizomeris_leibleinii Stigeoclonium_helveticum Volvox_carteri; -charset first = 1-%d\\3; -charset second = 2-%d\\3; -charset third = 3-%d\\3; -charset fourth = %d-%d\\3; -charset fifth = %d-%d\\3; -charset sixth = %d-%d\\3; -partition mine = 6: first, second, third, fourth, fifth, sixth; -set partition=mine; -lset applyto=(all) nst=6 ngammacat=4 rates=gamma; -prset applyto=(all) statefreqpr=Dirichlet(1.0,1.0,1.0,1.0) revmatpr=dirichlet(1,1,1,1,1,1) brlenspr=Unconstrained:GammaDir(1.0,0.100,1.0,1.0) shapepr=exponential(1.0); -unlink shape=(all) statefreq=(all) revmat=(all) brlens=(all); -set seed=9223 swapseed=9223; -mcmcp ngen=8000000 samplefreq=500 printfreq=8000000 starttree=random nruns=1 nchains=1 savebrlens=yes filename=ss; -[sumt filename=mcmc;] -[mcmcp filename=ss;] -ss alpha=0.3 nsteps=30 burninss=-2; -end; -'''% (nchar_gene1, nchar_gene1, nchar_gene1, nchar_gene1+1, newnchar, nchar_gene1+2, newnchar, nchar_gene1+3,newnchar) - - -#concat_topo -###########submitallscript########### - submitallscript ='''cd %s; qsub qsub.sh; cd .. -cd %s; qsub qsub.sh; cd .. -cd concat_topo_brlen; qsub qsub.sh; cd .. -cd concat_topo; qsub qsub.sh; cd .. -'''%(genes[a-1], genes[b-1]) - submitall = os.path.join(gene_dir, 'submitall.sh') - newf = open(submitall, 'w') - newf.write(submitallscript) -###########submitallscript########### - - -###########qsubscript for concatenated set########### -# qsubscript = '''#!/bin/bash -# #$ -S /bin/bash -# #$ -cwd -# #$ -N concatenated -# #$ -q highpri.q,highmem.q -# mb concatenated.nex -# rm -f mcmc.* ss.*''' -###########qsubscript for concatenated set########### - - -###########gene1########### - single_gene_dir = os.path.join(gene_dir, genes[a-1]) - if not os.path.exists(single_gene_dir): - os.mkdir(single_gene_dir) - output_path = os.path.join(single_gene_dir, ('%s.nex' %(genes[a-1]))) - newf = open(output_path, 'w') - newf.write('#NEXUS\n\n') - newf.write('Begin data;\n') - newf.write(' Dimensions ntax=%d nchar=%d;\n' % (ntax, nchar_gene1)) - newf.write(' Format datatype=dna missing=? gap=-;\n') - newf.write(' Matrix\n') - longest_taxon_name = max([len(t) for t in new_taxonlist]) - for t,s in zip(new_taxonlist, seq1): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n\n') - newf.write(mrbayes_single_run1) - newf.close() - ##qsub_script##### - qsub = os.path.join(single_gene_dir, 'qsub.sh') - newf = open(qsub, 'w') - newf.write('''#!/bin/bash -#$ -S /bin/bash -#$ -cwd -#$ -N %s -#$ -q highpri.q,highmem.q -mb %s.nex -rm -f mcmc.* ss.*'''%(genes[a-1], genes[a-1])) - ##qsub_script##### -###########gene1########### - - -###########gene2########### - single_gene_dir = os.path.join(gene_dir, genes[b-1]) - if not os.path.exists(single_gene_dir): - os.mkdir(single_gene_dir) - output_path = os.path.join(single_gene_dir, ('%s.nex' %(genes[b-1]))) - newf = open(output_path, 'w') - newf.write('#NEXUS\n\n') - newf.write('Begin data;\n') - newf.write(' Dimensions ntax=%d nchar=%d;\n' % (ntax, nchar_gene2)) - newf.write(' Format datatype=dna missing=? gap=-;\n') - newf.write(' Matrix\n') - longest_taxon_name = max([len(t) for t in new_taxonlist]) - for t,s in zip(new_taxonlist, seq2): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n\n') - newf.write(mrbayes_single_run2) - newf.close() - ####qsub_script##### - qsub = os.path.join(single_gene_dir, 'qsub.sh') - newf = open(qsub, 'w') - newf.write('''#!/bin/bash -#$ -S /bin/bash -#$ -cwd -#$ -N %s -#$ -q highpri.q,highmem.q -mb %s.nex -rm -f mcmc.* ss.*'''%(genes[b-1], genes[b-1])) - ####qsub_script##### -###########gene2########### - - -###########concat_topo_brlen########### - concat_dir = os.path.join(gene_dir, 'concat_topo_brlen') - if not os.path.exists(concat_dir): - os.mkdir(concat_dir) - output_path = os.path.join(concat_dir, 'concatenated.nex') - newf = open(output_path, 'w') - newf.write('#NEXUS\n\n') - newf.write('Begin data;\n') - newf.write(' Dimensions ntax=%d nchar=%d;\n' % (ntax, newnchar)) - newf.write(' Format datatype=dna missing=? gap=-;\n') - newf.write(' Matrix\n') - longest_taxon_name = max([len(t) for t in new_taxonlist]) - for t,s in zip(new_taxonlist, new_sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n\n') - newf.write(mrbayes_concat_topo_brlen) - newf.close() - ####qsub_script concat_topo_brlen##### - qsub = os.path.join(concat_dir, 'qsub.sh') - newf = open(qsub, 'w') - newf.write('''#!/bin/bash -#$ -S /bin/bash -#$ -cwd -#$ -N %s_%s_concat_topo_brlen -#$ -q highpri.q,highmem.q -mb concatenated.nex -rm -f mcmc.* ss.*'''%(genes[a-1],genes[b-1])) - ####qsub_script concat_topo_brlen##### -###########concat_topo_brlen########### - - -###########concat_topo########### - concat_dir = os.path.join(gene_dir, 'concat_topo') - if not os.path.exists(concat_dir): - os.mkdir(concat_dir) - output_path = os.path.join(concat_dir, 'concatenated.nex') - newf = open(output_path, 'w') - newf.write('#NEXUS\n\n') - newf.write('Begin data;\n') - newf.write(' Dimensions ntax=%d nchar=%d;\n' % (ntax, newnchar)) - newf.write(' Format datatype=dna missing=? gap=-;\n') - newf.write(' Matrix\n') - longest_taxon_name = max([len(t) for t in new_taxonlist]) - for t,s in zip(new_taxonlist, new_sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n\n') - newf.write(mrbayes_concat_topo) - newf.close() - ####qsub_script concat_topo##### - qsub = os.path.join(concat_dir, 'qsub.sh') - newf = open(qsub, 'w') - newf.write('''#!/bin/bash -#$ -S /bin/bash -#$ -cwd -#$ -N %s_%s_concat_topo -#$ -q highpri.q,highmem.q -mb concatenated.nex -rm -f mcmc.* ss.*'''%(genes[a-1],genes[b-1])) - ####qsub_script concat_topo##### -###########concat_topo########### - - -####### go.sh ######### -jobscript = os.path.join(output, 'go.sh') -newf = open(jobscript, 'w') -newf.write('#!/bin/bash\n\n') -for i in combo_list: - newf.write('cd %s\n. submitall.sh\ncd ..\n\n' %(i)) -newf.close() - -####### capture_marg.py ######### -jobscript2 = os.path.join(output, 'capture_marg.py') -newf = open(jobscript2, 'w') -newf.write(capture_script) -newf.close() - - - diff --git a/randomSeq.py b/randomSeq.py deleted file mode 100644 index 4925a91..0000000 --- a/randomSeq.py +++ /dev/null @@ -1,211 +0,0 @@ -import sys, re, os, random - - -# number1 = raw_input('number_of_runs? > ') -# num_run = int(number1) -# #print num_run -# -# ###specifying number of taxa to be generated -# number2 = raw_input('number_of_taxa? > ') -# ntax = int(number2) -# -# ###specifying number of sites to be generated -# number3 = raw_input('number of sites? > ') -# num_sites = int(number3) -# -# ###specifying number of dna matrix to be generated -# number4 = raw_input('number of genes? > ') -# number_genes = int(number4) - - - -num_run = 1 -#print num_run - -###specifying number of taxa to be generated -ntax = 6 - -###specifying number of sites to be generated -num_sites = 10000 - -###specifying number of dna matrix to be generated -number_genes = 10 - -num_trials_pergene = 2 - - -for i in range(num_run): - run_name = i+1 - run_name2 = 'trial'+ str(run_name) - print run_name2 - master_dir = os.path.join(os.path.abspath(os.curdir), run_name2) - if not os.path.exists(master_dir): - os.mkdir(master_dir) - - - ###creating a list for the directories for each randomly generated dna matrix - all_seq = [] - sets = [] - for a, b in enumerate(range(number_genes)): - a += 1 - set = "gene"+ str(a) - sets.append(set) - - ### creating dna matrix with randomly generated sequences - for n, i in enumerate(sets): - ### specifying names of the directory and dna matrix - folder_name = i - n += 1 - new_nexus_file = 'randomseq'+ str(n)+'.nex' - new_nexus_file2 = 'randomseq'+ str(n) - ### creating a list of randomly generated sequences - sequences = [] - for i in range(ntax): - myrandom = [] - for i in range(num_sites): - i = random.randint(1,4) - myrandom.append(i) - dna_list = [] - for m in myrandom: - if m == 1: - m= "A" - if m ==2: - m= "T" - if m ==3: - m= "G" - if m == 4: - m= "C" - dna_list.append(m) - dna_list2 = ''.join(dna_list) - #print dna_list2 - sequences.append(dna_list2) - all_seq.append(sequences) - - - - ### creating a list of taxon_names - taxon_names = [] - k=0 - for i, m in enumerate(sequences): - k += 1 - taxon = "taxon"+ str(k) - taxon_names.append(taxon) - - - ### creating a directory - new_dir = os.path.join(master_dir,folder_name) - if not os.path.exists(new_dir): - os.mkdir(new_dir) - gene_sets = [] -# for i in range(3): - for i in range(num_trials_pergene): - m= i+1 - seed_num = 4648+m - m2 = 'run'+str(m) - gene_sets.append(m2) - new_subdir = os.path.join(new_dir,m2) - if not os.path.exists(new_subdir): - os.mkdir(new_subdir) - - bash_filename = 'qsub.sh' - bash_file_content = '''#$ -S /bin/bash -#$ -cwd -#$ -N %s -#$ -q highpri.q,highmem.q -python runphycas.py -/common/galax/rungalax.sh --treefile trees.t --skip 1 -'''% (new_nexus_file2) - -# # ### saving dna matrix to the directory -# - full_path1 = os.path.join(new_subdir, new_nexus_file) - newf = open(full_path1, 'w') - newf.write('#nexus\n\n') - newf.write('begin data;\n') - newf.write(' dimensions ntax=%d nchar=%d;\n' % (ntax, num_sites)) - newf.write(' format datatype=dna missing=? gap=-;\n') - newf.write(' matrix\n') - longest_taxon_name = max([len(t) for t in taxon_names]) - for t,s in zip(taxon_names, sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n') - newf.close() - - full_path2 = os.path.join(new_subdir, bash_filename) - newf = open(full_path2, 'w') - newf.write(bash_file_content) - newf.close() - - full_path8 = os.path.join(new_subdir, 'runphycas.py') - newf = open(full_path8, 'w') - x = open('runphycas.py', 'r').read() - newf.write(x %(int(seed_num), new_nexus_file)) - -# -# -# - combined_sequences = map(''.join, zip(*all_seq)) - num_sites_combined = len(combined_sequences[0]) - new_dir2 = os.path.join(master_dir,'combinedSeq') - if not os.path.exists(new_dir2): - os.mkdir(new_dir2) - - full_path3 = os.path.join(new_dir2, 'combinedSeq.nex') - newf = open(full_path3, 'w') - newf.write('#nexus\n\n') - newf.write('begin data;\n') - newf.write(' dimensions ntax=%d nchar=%d;\n' % (ntax, num_sites_combined)) - newf.write(' format datatype=dna missing=? gap=-;\n') - newf.write(' matrix\n') - longest_taxon_name = max([len(t) for t in taxon_names]) - for t,s in zip(taxon_names, combined_sequences): - formatstr = '%%%ds' % longest_taxon_name - namestr = formatstr % t - newf.write(' %s %s\n' % (namestr, s)) - newf.write(';\n') - newf.write('end;\n') - newf.close() - - full_path4 = os.path.join(new_dir2, bash_filename) - newf = open(full_path4, 'w') - newf.write(bash_file_content) - newf.close() - - - - full_path6 = os.path.join(new_dir2, 'runphycas.py') - newf = open(full_path6, 'w') - x = open('runphycas.py', 'r').read() - newf.write(x %(4648, 'combinedSeq.nex')) - - - full_path7 = os.path.join(master_dir, 'submitall.sh') - newf = open(full_path7, 'w') - newf.write('#!/bin/bash\n') - for i in sets: - for m in gene_sets: - newf.write('cd %s/%s; qsub qsub.sh;cd .. ;cd .. ; \n' %(i, m)) - newf.write('cd %s; qsub qsub.sh; cd ..\n' %('combinedSeq')) - newf.close() - - full_path9 = os.path.join(master_dir, 'treelist.txt') - newf = open(full_path9, 'w') - for i in sets: - for m in gene_sets: - newf.write(i+'/'+m+'/trees.t\n') - newf.close() - - - - full_path10 = os.path.join(master_dir, 'galax.sh') - newf = open(full_path10, 'w') - newf.write('#!/bin/bash\n') - newf.write('/common/galax/rungalax.sh --listfile treelist.txt --skip 1') - newf.close() - - - - diff --git a/simSequences.py b/simSequences.py deleted file mode 100644 index 4181da1..0000000 --- a/simSequences.py +++ /dev/null @@ -1,174 +0,0 @@ -import readSeq -import random -import re, os, itertools, sys, glob -from itertools import chain -from math import exp, log -class node(object): - def __init__(self, ndnum): # initialization function - self.rsib = None # right sibling - self.lchild = None # left child - self.par = None # parent node - self.number = ndnum # node number (internals negative, tips 0 or positive) - self.edgelen = 0.0 # branch length - self.descendants = set([ndnum]) # set containing descendant leaf set - self.partial = None # will have length 4*npatterns - self.state = None - self.states = None - - - def simulateSequences(self, num_sites): - self.states = [str]*(num_sites) - freq = [0.25, 0.25, 0.25, 0.25] - current_states = [ 'A', 'C', 'G', 'T'] - if self.par is None: - - for i in range(num_sites): - - ran_nm = random.random() - if ran_nm < freq[0]: -# self.state = 'A' - self.states[i] = 'A' - elif ran_nm <= freq[0]+freq[1]: -# self.state = 'C' - self.states[i] = 'C' - elif ran_nm <= freq[0]+freq[1]+freq[2]: -# self.state = 'G' - self.states[i] = 'G' - - else: -# self.state = 'T' - self.states[i] = 'T' - - else: - for m in range(num_sites): - prob = [] - ran_nm = random.random() - - for i in current_states: - if self.par.states[m] == i: - p = (0.25+0.75*exp(-4.0*(self.edgelen)/3.0)) - prob.append(p) - else: - p = (0.25-0.25*exp(-4.0*(self.edgelen)/3.0)) - prob.append(p) - for i in prob: - - if ran_nm <= prob[0]: -# self.state = 'A' - self.states[m] = 'A' - - elif ran_nm <= prob[0]+ prob[1]: -# self.state = 'C' - self.states[m] = 'C' - - elif ran_nm <= prob[0]+ prob[1]+ prob[2]: -# self.state = 'G' - self.states[m] = 'G' - - else: -# self.state = 'T' - self.states[m] = 'T' - - return self.states - - def __str__(self): - # __str__ is a built-in function that is used by print to show an object - descendants_as_string = ','.join(['%d' % d for d in self.descendants]) - - lchildstr = 'None' - if self.lchild is not None: - lchildstr = '%d' % self.lchild.number - - rsibstr = 'None' - if self.rsib is not None: - rsibstr = '%d' % self.rsib.number - - parstr = 'None' - if self.par is not None: - parstr = '%d' % self.par.number - - return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - -def simulate(preorder, ntax, num_sites, out): - newf = open(out, 'w') - newf.write('#nexus\n\n') - newf.write('begin data;\n') - newf.write('dimensions ntax=%d nchar=%d;\n' % (ntax, num_sites)) - newf.write('format datatype=dna missing=? gap=-;\n') - newf.write('matrix\n') - master = {} - for nd in preorder: - master[nd.number]=nd.simulateSequences(num_sites) - if nd.number >0: - newf.write('%s %s\n' % (nd.number, ''.join(nd.simulateSequences(num_sites)))) - newf.write(';\n') - newf.write('end;') -def readnewick(tree): - total_length = len(tree) - internal_node_number = -1 - - root = node(internal_node_number) - nd = root - i = 0 - pre = [root] - while i < total_length: - m = tree[i] - - if m =='(': - internal_node_number -= 1 - - child = node(internal_node_number) - pre.append(child) - nd.lchild=child - - child.par=nd - nd=child - elif m == ',': - internal_node_number -= 1 - rsib = node(internal_node_number) - pre.append(rsib) - nd.rsib = rsib - rsib.par=nd.par - nd = rsib - elif m == ')': - nd = nd.par - - elif m == ':': - edge_len_str = '' - i+=1 - m = tree[i] - assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] - while m in ['0','1','2','3','4','5','6','7','8', '9','.']: - edge_len_str += m - i+=1 - m = tree[i] - i -=1 - nd.edgelen = float(edge_len_str) - else: - internal_node_number += 1 - - if True: - assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m - mm = '' - while m in ['0','1','2','3','4','5','6','7','8', '9' ]: - - mm += m - i += 1 - m = tree[i] - nd.number = int(mm) - i -= 1 - i += 1 - - return pre - -if __name__ == '__main__': - - output_filename = os.path.join('simulated_output.nexus') - - yuletree = '(5:1.86010,((3:0.47109,2:0.47109):0.492,(4:0.05805,1:0.05805):0.906):0.896)' - - preorder = readnewick(yuletree) - ntax = 5 - num_sites = 100 - result = simulate(preorder, ntax, num_sites, output_filename) -