diff --git a/readSeq.py b/JC+GAMMA/readSeq.py similarity index 95% rename from readSeq.py rename to JC+GAMMA/readSeq.py index 91c58a5..6453201 100644 --- a/readSeq.py +++ b/JC+GAMMA/readSeq.py @@ -1,4 +1,4 @@ -def patterns(sequence_file): +def patterns(): # import re, os, glob, itertools, fnmatch, sys, shutil from itertools import combinations @@ -10,7 +10,7 @@ def patterns(sequence_file): genes = [] data = {} # print 'Reading nexus files...' - for filename in glob.glob(os.path.join(script_dir, sequence_file)): + for filename in glob.glob(os.path.join(script_dir, '*.nex')): m = re.match('(.+).nex', os.path.basename(filename)) gene_name = m.group(1) diff --git a/JC+GAMMA/readSeq.pyc b/JC+GAMMA/readSeq.pyc new file mode 100644 index 0000000..157ab98 Binary files /dev/null and b/JC+GAMMA/readSeq.pyc differ diff --git a/readtree.py b/JC+GAMMA/readtree.py similarity index 100% rename from readtree.py rename to JC+GAMMA/readtree.py diff --git a/JC+GAMMA/simulated.nex b/JC+GAMMA/simulated.nex new file mode 100644 index 0000000..af9f492 --- /dev/null +++ b/JC+GAMMA/simulated.nex @@ -0,0 +1,20 @@ +#nexus + +begin data; +dimensions ntax=10 nchar=10000; +format datatype=dna missing=? gap=-; +Matrix +1 CTTTCTGGGTGACGTCGCGCGAAGCTTTTGCCCTTAGCCCTGAAACATAGCGCGCAGCACATGCATGTCACCGGTTTGTCAACCCGCCAGTGCTACTCAATGGGTCATATCGGGCATGATCCGCCTCCCTTCCCGGCCATATTAACGGTCTTGAACCTCATTTTCGCGGTACGCGCGTCCACGTTGTGATTGACAAGCCTCCATGCGGGAGCACCTAACATACGTTAGCCTGATCCATGGGACGTAAACTGCACTACATCAGTTTTGTGGGTCTCAGTAGGCGGCTCTGACAGTAGATTCCCTGCAGTACAGATCTCTACGCAACACATAGAACTAGGTTGCTGGAAGTTGAAAACGTTTCGACTGCCTGTACAAAAAATTCGTCGGCAATACAAGTACCTGTAGTTGCCTTGATTGCTCTCAGGTCACATAGGCTTGATTTTAACCAGACTGACGAGTGGGGGCCTCGACGCTGCTATAAAGTGATACATAAGCTCGGAGCAACAAATCTTCAATCGTTTACGAGGGGGGGCCGGCGCAAGCGCGATCACTCTTCCTGTGCCGTATATAGCTCGGATAGGCCCAGGCGGCTAACTAATCCGGTTACAATCAGTATATAGGCAGGGCACGGCACGTGCGGCCCGTGAAACGAAAAATACCTCAATGCCATGGCACTCCTATCTTTGAGGCAACTCGCTGTATGCTATTTTGACAACTCCTCCACCCTAAAAACCTCTTCAACGTGAAACCGTTGCAATCGTAGAAGTTCTCTGACTGTGATGAGAATGCCCTGGTGCAACCGTTTGACTAAATCATACGGTGCTGAAACTGCACATGAACCTCTGGTATCGGAAAGGCGAGGAACACTCGGTATTCGCGCAGTGGAATCGTCCCATATCTATATAGTTAGGCTGAATAGAAAACGCGGATACTCTTTGTCCCACTGAGATTGGCCGGATGCAAGCGTACTCTTAAAGATTTACGCATCTAACTGTCTCTACCGGGGGATATCCCCACACTAACTTTGACAAGGACGTCTAGACCTAAGTAGGGATCTCCGAACTGTCCTTCCTCTGTCCGTCAATCCCGGCATCCACTCTAACGCCGACAAGCTTAAAATTGATATAATAGCGAACCCGGAATTAGTGTCGTCGCCTCGGGAGGCAGTTATAGATTAGTGATTAGCCACTATCGCGCCCGTTTGGACAAAAAAGACGCATAACGACCGCTGAGCGAGATAACATATGCTGCGCCCGACGCGCGAACGTGCCATAGCCTGCAACACTATCGTGGTAGTCCGTATCCCTTCCGTGATCCCCTGGGCCACTTACCCGACTACCTCCGATGTGTCCCCCCTCGTGCAAGACACCGTCCATAGGCACGTTCTGGATTGCTCCGTACGACCCGTAAAAATCAGTGGGACGTCCCTAGAGGCAGACATTCCGGGACACGGTTTCTAGAGCGAAAGGGGGTTGTTCTGGAAGAGAAAATAGGGACCTCATATAAGTAGGAATAAACGGTCGACGTCCGAATGTGGTCACCTTGCTTTAGACGCTGTTGTCACTTAGCAAGTATGAGATGGACGCGAAGTGGTCAACTACCCGGTTTCCCCTGACGGGGTACGTTGCGAAGGGAACTCTGCTGTAAGCAACAGCTGTGGCGTTAAAGGCAAAGGCGAAGAGTGGTAGAACGTACCTCGTTCATCGACTTACCGAGGTTTTCTAATTCTTTGCAATCATGCTTATTGAGGTTAAAGAGTACTCACCCGTTGTACTCCGGTTTTTCCTGTAATGTTTCCAAATGGTTAATGAAACTAGCCGCTGCGCCGGCCTAGTGATCCCGAGCCTGGCAACAGGGAACAACCTTAACGGGTGACCAGTGGGGTGTCTCTGTGGATATAGTCACCCGCCTAACACGCCAGTGATGGAGTTTCTAGCTAGAGACATCTTCGGCAGCTGGTATAAGTATGTGATCTTGAGTGCGGTAATTACGAAGAGTTGCGGAGGAATCCGTGCGTTGCCTTTAGGGTTCATCGAGAGCCCTACCTCCAATTGTATCTTACGCGTCCCGCGGGTTTGTCAACCTACGCAGCCGTTCATTCTCGGTCGACGACGGATATAGGCTGGAAAATTATCGAGAGGCTGGCCCAACTGAGCTCTCGTCACATAGGACTCGGAAGATGCGTGGATGAGCTTAGACTTAGCGAAGTAGCTGCCTTGTTGCCGTCAAGGATATTGCATCACGCAAATACTACGAGACACCCGTACTCCAACCGATCGATACTTGAAGATTTGTCGAAGGTCAAGAAGATCATACACCTCCTGGGGCACTAAGCCGGGGATTAAAGAGCTGGCAAATAACTCACCATTAAGCGATGAGCGGCCCAGGTCCTCCCTAACGTTGATGGTCTGGGGGTTGGGTGAACAATTTGGTCCACGGGGACTTTTGTCGGTTTTCTCAATAGGAAAACTCACCCACGTTAACGCTAGCCACTACCAAATATGCGGGATTTACCTGCGCTGTGGGTAGCCAATAGATGCACGAGCACCGTATCGAACCATCAAGCTCAAATCGTTCGAACAAGGGAAGTGTAGCGTTGAGCTAGTCCATCTAATCATCAATATACAATAGCCGGCAATCAACTGGCCTTAGCGGTCAGACAGTCGAGGCTGGTATTCGCATGTCAATCGAGGTCAGACCACGAGGTTGCACTGTTCTGCTCAGCGGTATGGATCAACATGCGATGTGCGCCATTCGTAGTGCCGCTTGGCAAAATCAATTTCTCTCCCTGTGCTACGTATGTTCGCAACACCTTGGCCGTAGTCCGGGTTTTTTGACGGCTGGGAGTTCTCGCAATGGACATCCCCGGGTTCCCGCCACGCCTGGGGTCTACAGGGGGATCGCGGTCATGCTTCAGAATTCCGCCTACGTGTTCTAGCAATTATGAAATCAACCAATGGCCTTTTACATGTCGGTGCTCGAATGTGTGGAACGAAGTTGGCTTGGTGTATGGATCACCCCACTAATCTTTTAAGAGATCCGGCTTAAACCTCACTCCTCTTTGTTGGCGCAAGCATAGTTAAGAATTTGCTTTGTGCCTGTGCCATTACGATTGACATGGTGGGCCCCGTATAGGGTCGCCAAAAAATTGTGAAGGCGAAGAGTCAAATGCCTTGTTTGGAACTCCGGCCGGGTGAAGCGCACTCTTGGCGATTGGGATCCTGACCCGGATCCCCAGTTATGTTACTTGGTCGATTCGTTTCTGGTCTTTTCACGGCGCCCCTATCTGAGAAACTTAGTCCATCGTGCAATAGCACCTTACACGTTGTGTCGTTGCCGGTGCCGTTGTTTTATTTTGGGGGCCGAGGGTGTTCCCAGGTGGATCAGTACCGAAAGTCTGTCGATACTAAGGTTAAGGGGTAGACGGGGATTGTTTATTGACAACGGTTCGTCCTTCGGGATTGCTTGATGTGATAACGTCTGATTCGTCCAGTATATCGTAGGTCTCACCGCTCCACGGTACTGTCTGATTGAGAAGCTCGGATTTTCCTGTTGCGCTGAATCAACTTCGAGGTAGGCGCGCTCATGCGTACAGGTAAAGAATTTTGCCTAAAACGCCTGCACGCGCTATCATATCACTCTGAAATAGGTTTTCTCGCCCTGAAACTAAAGTCCGTTGACCACCCGGAGAATAGGGATGCCGTATTATTTAGGCGGGACCTCTGGTATGAAACTCTTAGCCTCGTTCTCCACGGTAGATGAGTCGGTTCTTCATAGCCAGTGTACCGATTGCGGCTTTATTTGATTGGCATACGGCGGAGTTCTTCGTAAGCAGCTTATGAGCGCACTGTGGTATAGTTAAATGGATGTTTCCCGTCTCCATGATGTTGCTGTTCCATAGCGCTACGGAATAGATTGCGACGGTCGCCTTGGGGATTACTCATGTGTCACTTTGAGGTGAGCGAAACGCACAAGGCCCCTTGGGGCGCTAATTTTATTAGGCCAATCATCCGGGTCGCTGAGAAATTATTCGATCACGTATGGAGGTCGTAGGTGTACAAATTAACCACCGGGCTAGCATACGATCACCAGATTTTACTCAAGACTAAACTTCTTTAATGTGGGTCTAAAGTACGAAATAACCCGTGGTCTATCAGGAAGTGCTGTTTTAACGTATAGCATCTAATTTTGGACGAGCCTAATGTCGCCCGTGACTTAATTAGTCACAATACGTACGTTATGACGCGTTGACGGCTCCTAAGGTAACCCCTAGCCATTCGAATCAGAGGAGATTGTAGGCTAGACCCTGCGTTGGTTGAGCGTCCACCTAAATCTTCAGGATGTGTTTAGTTATTTCGACCGAGTCCAATGTAGCGCCTCCGACGGCTTTTTTGAAATTCACTCCGTCTTGTTAATTGCCGTAGATGGCTCGAAAAAATATGCTGAAGGGGGGCTAGCATATTCGCTAAGCCTCATCTGGTGTGCCGAGTAACGGGTCCATATAAAAACCCCGCTCCTATTCGTTAATGGGAGCTGATCCGCGTCGTTATTGTTCAGCCTCCCTACCGCAGTGGGAATGACTAGTTAGAAAGAGTGAAGGATTATTACGGCCATGTCTATTTAAGTTGATCCATGGATATAGTAAGGCTATTAACTACGAATTGCAAGTTCACGCAATGAAAAGTAGCGCTTGTATCGCAAGTCATGGGCTAAGCTTAACGGTGACTCTATGACAGCGTAAATACTGGGCGCAAGTGGTTATCGATATCTCCTGTAATTAAGACATTGGAAGCCTTTGGGCTATAACCTACTAGCCCTGTGGTTTGTGCACATAGAGTTAGACGGCGAGCGCAGCCGATGCAATGATCAATGACTCGACGGCATTTCTCATCCAAAAGTCGCCAAACCGACACTCCACTAACAATTGCTAAGGAAAGAACGGGATCATGGTTTCGCCATTATCCGCATTGCGGATTCACGCCAGTTGTTATTATTTTCCATGAGTACGGGCCGGAGGCTAATACTTGAGCGGATGTACACGGTTGCGAGTTGGGGCTGCTTACGCCACTCGTATCATGCGCGGGCGCGGATCTATCGGTGGCTACGTAGTTGCTTAGACCTATAAGGTTAACGGCGACCTCGTCAAATCGTGGAATGCTACAGGACCCGCAACTGAAAACAAGCCGACACCGGACCAATCGAATAACGGACTATCGTGTGCATTTACTCGGTGGCAAGTTCGTACACTTACAGCGAGATATAAAACGATCTGGCGTTGACTTAATCCCAATTGGCCGAGCCGCAGACCAATTCGGCCTTCGAACGTATCACTTTGAGGCCAAGGATACAAGATCACCCGTCGTTTAAGCAGAGCCACCTAATGCTTTAAGAATATGGAGCCTATTCACCTTGTTATTGCGTCTGCCCATTCCCTACTATCGTTTTGGGAACTGAGCATTCGCCACCACAAGCTTCAGTGCCGACAGCACGAGAATGTACGTGGTGGCAAGTCTGAAATGAAAGGTACGTAGCGGTATCTACGCCCGGTCTCTGCTTGCTCTATACATTTGATCCATTGGCGGCGGCGACTGTAGCTGAGCCGAGGGATGTGCTATCGGTTCGCATTCAAAACCTGTCATAGCGCGCGACAGTAACTGGGTTCACCTCGTTATAATTGGGGGATACGAAGCCCGCCCTATAACATGCTAACAACGTAAATAGTGGAAATGTGCCGCACGTAAAGGACAATGGGAACACGGCAAAAAACGAACGGTCTTAGACACTGTTGCTGTGTAGGCTCTCCCCCTTTAATAATGTAGACTCCCTTGTCCGGGTAGCACAGCATCCAAGGTCCGTGGTCTTCTAGGCTAATTTCGTGAACCGCCGGCACTCATATACTGGTAATAGATTCGCGGTTCTGCGCCTAGGACATAAGGTCGGCGCGCAAACCCTGTCGCACTCGAGGATCACCTTAGTTGTTCCACATGAGACCGCTCCCAGTTAGAGTGTTTTGGAAGCCTGGATCATCTGGCATGTGCGCTTGGGGAACAAAGCCCACAGCAAGAACTTTAACAGCTGAGACTAGATGCAGAGGATTAATTGTCCCCTTAATCACTTACTTGAGGTGGATATCGCCGCAGATCTCTGGCCCAACCCTACGCCCAGCATCCAACTCTTGCAGGATTTATCCCTGTTCGGTGGATGACATGATCTGGTTCTATAATCCGTCCAGCGTAATGGACACGAAATGAATAGTACGGTTGAAGAAAGCCCCCAGGAGGGTGTAACACAGGTTACGCTCGTGTCGTCCTCCGTCAAAGAATGCTCCGCAGATCTAATCTACTACTAACAGCGCATTGACCGCAATTCACAGCTTCACGGGTTGAATCCGGTTATTCACGGTCCATTTATTTCATCGTGACCCCACCGGCCGCTAAGCTATAGGGCGGCGGAAAACGCTCCCAACCAGGTTGATGGGGGCCGCTATAGAGGATTATGCGATGGTGTTTGATTTATACCAACCCGTCCGGTTTTGTAATGGTTCCGATTGCACGCTCGGCGCAGTGGATAATACCTCGTAGCTCCGCCAATAGTGTTACTAAGTTATAGGGAAAGACGGCGAGCATGTGGTAGGCGGATAAGGTCCTCTAGAAGAGCTACGTAGCCGTGGTTCCCCGGGGCCGGACTCCGTGCCTTTAGCTTTTGACGTTTCGTTGCGTCCTGCATCGATTGTACCAACTCGACAGCCCAAATCATCAGCGAAACGGCGAGTCTATACCTATTACATACGCGGTAAGCGGACCCTCAACGAGCCAGCACGATGAAACATCAGTCTGTTCGAAGCGTCTCGTTTCTCCAGGTGAAGGTCGCACATATGAAAGATACAACATTGCTTCATTTTGCAATAGATGTGATGTCCACTGTAACCACCTATTGCCGATGCCTCATACGTTCGGTTACACCGAGATTACTGCGACTATCGGGTCGGCGAATATGCGATTCTCCAGTGGATCTTTCTGATACGCCCTTGAACGCGCATCTCCCTTGAACGAGGTAAAACATGGTTCAAATTGATTTGCTAATGAACTTTCTGAGTAAAGCATAAAATTGCCGTACCCGCAGCTCCCCCTATGAATGCTCTACGAATGGGGTAAAGCTCTATACTCCTCTGCGCCCGGCGATCCTAGACTTTGCAAGGAGCTGGCTAAGATGATAGCGAGGGTCCTTAAAGTAGCAGCTACCCATCGGTAGCGACAAACTCCGTCCCGTCGATCAGACCCGCGTCGCAACACGAATATAATATTCCACTTGCCCGTCCGCATTACGAGTCCGACCACCCGGAGAGAGAAGTGGGGTCAGCGAGGGGAACGTGTGTACACTTCCTCGATACTTCCTCTACAGTGAAGCATATCTCGCTCTAAGGCCAAATGCAACAAGCTCACCCACAAGGGTCTTGACTTCGCTGGGGCTTGCTGAGGGCCCTTACACTTTAGTACGAAACATGCGTCTGCGACAAATTGATGAAGGACCCTATGGTCCAGTTCACCCTTCAAAATCTACGGTCAGTGTCCGGACCTTCCGCCTGCTTCGCATAACATCCATAGTAACGATAGCTACCAAGTTAGTTTCCGGTCGTATGAGGGGATACGCGTGAAGTTAGAAACAAATGTGCGGTTCCCGCTATCAATCCCAAATACTGTAACCCTGAGTACCGTAGGCTGAGCGTCTACTCTAGTATCATCGGGCGTGCACTGTGATAAACGTTGGTGGAGCAACCTTTATCTGAACATATAAACTTCGCCACCGGCCTTACCTACCATCGTTCGTACATAGAACCCATTAGAATAGATGTTCTTTGAGGGAGTTACCATGTCACAGTTACACATAAGGACACTCCGGAGTTTTACAACTCAGGCTCGGTACCGACGGGAACCTGTGGAACTGCGTCTAACTCTGGAGCAGATCCGTAGTCCCGCGGACAGCGCACTGATCTTGACGCTAAGTGAAATTGGTAGCACGTCGTATTCCCGGCGTTTTCCAATATTGCAATCATCGTTTGCTCGGTGGAATCAGCGACGGAGGCGTATTTCCAGATATATTCAGCTCAAGGGAGTGGGTGGCTGCGATACGCATGCCGGATTTACACATCTGAGCATTAGGGAATAAGGTAGGACGATCAGCACTCTTGAGGTTCCGAACGTATTGGGATTCCGAACGAAAACACGAAGTTAACGCGGAATCAGTCATAGACTTCCCGAGTTGCCAGCTGTGGACGCACTGCCCACGTTAAGACATAGTCGCGAGAAGCATAGCAGTACAGGTGCGACCGACTTTTGTACTCTTCTCTGGAGACGGTGCAAAGACGGCAACCGACTGGTCAGCCCTTACGTTATCGCCTCAACATGTATAGGGTGAGCGAGATCCTTCATTGGTTGTCACGGTACATCTCCCAGAGATTGCAGTTCAGACTAACACGAAAGTGCATTGTACTCAAATCTTATCTCAAGAGCCTTCGGGTTTATTCATTGCGTAGACGTGGCCCGCTGTGCTCTGTAGTTCAATCCGTGTAGAATTACGAGAAACTCCATGTTCCGTGACCGTTTCCCTAACCCGGCTATTCTTCGCGGCGCCGTTCATCCTTGTTGTCCCGCAGCTCGTTTACTTGAGTATTTCCTGCGAGCCCAAAGAAGTCCATGAGAAATTCATAAAAAAACTGAGACTACTGGCGGATCAAGCACCCATAGCTGGCGATAGCGTTACGGCGCTATACGTTTACGGGGATATTGAATGTATCTTCGTAGTCAATCCAAAACACCGACGATCACGGTCCTAATTATGACATCTTTGTTCTCCTAGCAGTGATTTCTATGAGCTTTTAGGAAGAGGAATCGATTGAAAGCAAGCAAGTGGCTATCCACCAAAAACCGGAATGGCGCAACTATTTAGCACTCACGGCTGTGAAACAGTCGACACACACTCTTTAGTCAGCCATGAGTTTCTTCTCTTGCAGAGATCTCAGCCGAAATTGCGCTCGCCAAAATATACAGACTGACGACCAAAAGAGTCTTGGTGTTGTTAGCCAGTTAGTTAGAGTCTTATAGACAAGGTTGCTGGTAACCCTTCTAACGTCCGGCGCTTGGAGGACTATCAATCACGTCGTATCCTATGCGTTTTCACGTACGACGGCCTATTAAAATCCTGTTTAGGTTTGTAGGTGACTGGCGACCTCTGATTCCCAGCTCTATAAAGGTTGGGTCAACGTTCGCAGTCATCTAGTCTGGAGTAAAGCATCCATTTGCTATGAAAAGCCTCCTATAGTGTACCCCACTTGCCTCGGTTAGACCTCTCCAACAATTTCCTACGCTCGCACAAGAGAGAAGCATCGTCATTGAGAGTGCGCGCGTAGGGCGTCCGACGCTCGGCTGTATAGAACGACTCCGTTGCCGCTAACTAGCGCTTACTGGGCAATACATACCATTGCCCATTTGCCGCGTTCGCACTCCTCAATGCTTTAGTTACCTGAGTCATCATACAAAGCTTCCTAAAAAACCGTCTTGAAGCCAAAGCTAAGGATATACTCCTCCATTTAGTTACTTTTAAATCGGCTACGTGTTCCAAGTATATGTCAAACGTCGGATATGCGTCACGCATTACAACCCAGGCTAGCCGAGTATCCTGATGAGCCTGCCTGAGTGCACGGCCCTTGTCACGGTATGCCCTATCCTGAAGATATGACAGCATACCGGTGACTATATTGGTCGCGCGGTGGCAAAAGGATGCTCCAGCAGTCGGTCCATGATTTTATCGACCGCTAGGTGGGTGGCCTGGAGTCAACATCCAATAGAAATTCGGCCTCGCATGCCACTGGTCTCAATTCTTCCATCCCCACTTAACTCCGTTAAACATTGTCTGTAGGCGCCAGGCCCAGCTCGAGAGTCGGTGTCATTAACGAAGGAAGGTTCTTGCTCTAATCGGCTCCCAGGGACCAAAGTCCTGGATG +2 GCATTCCGTAATGGGTGACGGCGTAGCTAGCCTACTACAAGGACCGTTTCTAGGGGCCGGGAAAAGGGTGACGTCCCTCGCTCTAGACTGTGTGTAAAGGGTCATGTCATTAGCCTAATCTATGTATGCACCAAAACGAAGCGTGGGGTGTTTGCGGGTCTCAGTGGGTCCACAAGACCCCTTTTGGCAGAGTCAAGTAGGGTGTGCAGTGCACTCACTGTTATCTAGGGAGCGGGAACCCCTTATGATGTCGTGCTTTAAGTTCACGATCGAAACTGCTGCAGTCTCATCACAAACCCTCCCCTGTGCTGAACAGGCAGACTTGTGTTCCATCACCCAACGCTGCGAACCCACGCAGTTGGTATGATAGCTCCATCAACAATTAAAATTGGTGACGTCGTTTCCGGCCGAAATTAGTGCATGTAGCCGTGGATCGCAAAACGCCGAGCTACCACGCCGCGACCCGCGAAACGATGCTTCAGATGAGAGGGTTGTTCGCCCGCTCTCCGACCGAGCAGAGCGTACAGGCTTTATTTGACGACTAAGTGTGATGTTTGCGGTGTTGTCCCACCAAGTATACCCTACGTGAGAGGACCCAAAGGCCAGAATTACTGAGTGATTCACAGTATAGGAGCGCTTGTCTCAGTCGAAGGTGGACATAATGGTACTCGGTACCTACCGCCTAGAGTGAACAAACCCGGGGTCCGATTGTAACGGGCCGATTATAGATTGCAAGGGATCTATCCTAGCCAGTCGTTTAGCCATGAGGGGACAACGCCTGGACATACATACCGCCCGTGTACACGGATCAGTACGCCGATAGACTTTCTAATCCGGAACTAGCTAGGGTAAGGTGGTGCTATTGACGATTCCGTCCTTTGGTGTTCAGAGCTCATTTAACTGGACAGGGCTATAATTGATTCAGTGTGCGTCTAGAGTTCGGACCCCCCCATAGCCGGGGCAATCCTTCTATTCGCTAATAAACACTTCATGTCAGCTTACCGCGAATAGACTGGACTCACCGGCCGTTTGTACGGTTCGTCGCTTACAGGCCTGCTTCGTGCTCGTGGCACTGTAACAAGATCCTAGAAACTTCAAAGGTCTCACGAAAGACACACTGATCTCTGCCTCTGTGCTGGCGCGTCACCAGAACTAACAATAATGACCACGTTATATATTGAATAGACACGACTCTTACCAGCCATTGACTCGTGCAAGAACCTGCGTGCTGAGTTTGCAAATGAAGATCTCAAAATGCGAGTTCCTAAGGTGTGTATGACGAAAAGAACTCCCTTAATGCAATTGATCCAAGGAGTGTATGTCGCCGTAGTTGTCGAATGTGTCGCCCTGCTAGCGCAGAACTAACCTTGGACCGCTCTGCTCGACGCACGTTTTCGATACAGGGCGGATCGTAGCTGACGCTCTTATCTTGCTCACGTGAGATCATTGGGTCACTGAGAACGATCCCACGGACACTCTAAGAGGAAAGCAAGGCTAAGGGGCTGTATTGAAGGGCAGGCTCAGTGAATGGGCGGCTGGAGACCGTTCACTATCTCCATGAGAAAAATTCCCGGGATCCTGGCCCTAGTCGGTGAATTATTTTCGTTACCGGGCAGGTTTCGTGCAGCCGTGCAGCAAAAATTGTCTTCGGCGATCAGCTATACGGGTTAGCCGTCCGTGCATTGACACATTGAACTTCGAGCTCACTTATATCATGCACTGAAAGCCTCTTCGAGCAAAAAGCTTGGCAAGCACTTCATACATTCGCACATCTCCACCTACTCGGTTCTCAAACCAGTACCCACCTTGCCACAGACGTACTTGAATGGAGCCCCCATAGAAAGGAAGGTTGGAGGGTCAAGTAAATGTCATGTTGGTCAAAGGACGGGGCAAATGATTGTGGCGGTCTCGCAGCGAGCCTTGCCTAATTGGGCACGCGTACTTGAACAGCTGATTGCGCGCCCAGGCGGCGGGAACATCATTACGCCTTTGGCCTGATCTTCTATCGGGCTACTACGAGACTGGGAAGTCATATCACAGCGATTGAGATCTAAGAGGCAAAGTTTTGGTGGTTAATCAGAGAGGGAAGAACAGATAATATGTTCCCGTAGTCTACAGGGTGTCAGTAAGACTATAGGTATGTACACGGTATGGGTACGGACTTCCCGCTCAAGATCAGGATCCTTTCCTAGGGGTGCAATGCCCGGCTAGGTCGGTACAAGGCCAATCAAGGGTCTGATGAAATTGCGTCTACACCGGTGTGTAGTTACGAAATCTCTAGGATCAGAGCCGAACGAGCTACAAACTAGGTTTTTGTTATCACAAACACTACCCCAAGCTTTCAGCAGGTTTTGAAGGTGTCTGTAACCGTATGGAGGCGCGGGGAGGCAATCTTCCCTCTCTTTGGCAAACGCAATACCAGGGAAAGCCAGCATAACGTAACAAGTTTTCCCACCAGCAGACTTGCCTACCAACTCGAAGGTCAGTCTTAGGTTACGCGCCTAGCATGGTTGTTATGTATCGTAGCGGCTGATTCTAATAGACACGATTAGTGTTGCCCGGTTTCGGCATAACGCCGCACGGATCGAGGAACTTCCGGATCCTCCGCGGTCGCTATGCTCACGCCGACGTCCGGTCGAAAAAAGTAGGGTTTGCGGATATCTTCGTCTGCGTACTATCACCTTGGCTATTGTATGCGTACAGCGGACGTCAAGCCGTTACCCCACGATGTTACTAGCCCATTGGGCGTCGTCACTATTTCATATCCTTAAGTTACTAACCGATAGCGTGAATCCGTCTAGCGCGAGTTCCGCACACTATAAGCCTGACTTGACGGCTGTGCCAAGCACGACCGGGATAAGGCGAGCTAGAATCAGGGGCCACGGTTAGCCAGCTTGTACCGAGGACATGCGGGTCGACCTTCCAGCTAGTCGTGTGCCAACGCCGCTGACTCGAAGACCGCGACCCAGGGGCTAATTGTACCAAATCGTAGGCTATCATACTGTATCTCCCTTCCTCCCAGCGAGCCGGCCCGAGTATTTACCGACGCCGTAATGAATCGCCAGGCGCGACCTTGAATAGGCCTGCATTACTACGCAGATGCTCACTGAAACTCTCACATATACTCGTCTCCTAGCTGAGCTATTAGGGCAGAGCGGAGTGTCCCCCTACAAAGTGGCCCCTTCTGAGGACAAGCCTCGGAATGCCCTTGATGCACGAATCCCGTTAATTCACCGAGGAGTTAGGTGATCCTTTTACAGGTGCTTACTGTGCGGTTGGGGTGTGCAACCTGAAAAGAGCGTCGCACATTAACGCGATAAGTACCCACGCTGAAGGTACTGATAGTTGACTTTCTGTAGAATCTAGATTTAGACGATCGCGTCAACGTCCACCGGTTTGTATGGAGTGTCAGCTACGTTGATCAGTCTAGACATCTTGCATGATCCTCACGAAGGTTGCGTCCCTTATATCTGAGCCGAACCTGTAAACATCGGATATACGCCGGACTGACTCATGAGCGTTAAGGTGGCACTTGGTCCCCAGTTGGTAATGTATAGCGGCCTGGCTTGCATCAGATCACAGCAAGGTTGCTTAGCCTCAGGGGTGCTGTACTACCTACCTCAGGTCAAAACAGCGCTTAATACGTCGGTGAGTCGGTAATAGCTTAGACTTAAGGTGCAGTCGCCAAACGCAAACGCTGTAGAGGCGTCGTGGGACTAGGTCTCTAGACTCGTGATCTTGGATTGCACCCTTCGCGATGGGCTCGCAGCGAGCGCGTATCGACTGCGAGCTGTATGGACGCCTATGATCTTGATCTGCATCGCGCGCTTTAGAGGGAATTCCGCACCTTATTGTTCTCGCCAGCGACCCTCTTCTATGGTAAGAAGGGTAGACTTTGTATGTTAATGAGCAGAAGATAGTCGGACAAGCGATATTACCAAGGAAGACGTACTCGTAGCCGAAAGGCTCGCGCGGGATCTCCCTAACATTCATAAGACAGAGTACAGGATTTCAATCCGTCTGGACATACGACCAAAGCCGATATGAACTGGCCAGAGAGAATCATTCCCCTCGTACTCCCTGCGGGCTTATCAATACCGATCTTAACAGCGCCATAACCAGGCAGGTAAGTATGGGTACAGTTCGATCCGCTTCCGGGGAAACCCCGGAGCAGGATCTGTACCCATCAAAGTCCTCGTTAGGCAGACGTGAAGGCATGGACGCTCGCTGTCAAGCATGGCGATGCGTCTGCTTTATCAGTTTGAGAGCCCCGCCCTTATAGACATATATTGCGTCGATTCGTCCTCGGCGTGAACCGACACTTGAGTCGTGGCTCATGCTGTCCGAGCCGAACTCGAATCCGCAACGAATCACAGAATTTCTTGAACGGGCACTACCCCCAAGCCAACTTGTGGGTAACAACAGGGTAGCGGTTGAGATAGTGGTGCCGGACAATGCATTTAGACGGAGGATCTGCCTTCTCGCGTTGCTATGTACGAACACGTGCGACGGAACAGATTGTACTGGCAGGTGTATTCGGTTGTGTGCAAAGCGTCGAAGTGGTCTTAGCTATGACACTTGCAACTAGCCGATACACAACAAGCCGGAGTGGTCAGACTCAAGTAATGAGCGTTGTTGCCCCGTTGCTTAGTTAAGCCCGGATTCCCGATGATTCACTCCTTACCTGCGCTCTGCACAGACCCGCGAGATTCCAGCTAGATCTCTAGGGTCGCAAATTCATCTCACATGATAAAGTATTCTATCAGCCTTGAGAGGTGCAAGTTTTAGTCTAGCGAGCGGAGCGGGCTCTGGCAATCTTTGAAGTTTCGCGTTACCATACAACGCGGTCGGGAGACAGGAGGTAGGGATGCGCCCTACTCGTCCCTCTGGTCATTCATTAATAACATAGACGTGTTTCTAGACAGGTCAAAGTAATTGACAACTTTCAAAGATAATCCACTGACACTTAATTCGTAGGCTTCACTGAGTTGGTATATCAACAGACAAGGGTATAGACCGTTAAGCCGCGGCCCGTTAAAGCCAATGATATCCCTCTCGTATCGTACTAGACCGCTAAGCTGACCCAGAATAGACATGTCTGTAAACCCTCTGGTAGGGGTAGCACTCGCATTTCCGTAATGGGCCTCTTGCGGTCCACTGAGGACCGCAGTCCTCGGTGTTGTTAATGCAGGTACTTTCAAGTCTAAAGTACGAGACAACACGTTGTGATAATATACCCTACTTTAACGTCTGGAACAAATGCTTAGGCGGTGACCTCGGACCCGGTCCTCTAGTGACTGCAGCGATTATTTCAAGCACCGTATTCTAACACCGAGGAGCAAGCTGAACATCGTAATAGGCCCAAGGGCGGGGCCCCGAAAGCTTTTAGTAGGATCTTGGTGGGGGCCGATCCTCACACCCTATAGCCTCCGGTAGCTAGCATCTGGATTTCGAAGATACCAATCTTCCTGAAAACTCAACGAAAACTGTAACGGTGGACCATGGCATCCGTACACGAAGGGCCCGTGCTTCCCGTGATGCTTTTGATGGCCAGTTACAGCTCTTTCCGGGCGAATTGTTACATGGCCACTACATCAACCCGCACTCAACGTCCCCATCACTCCCCGACCGGTTAAGCAGGTCTCCTAACTTTAAATGGACGGGCCCACATGCTTGAAAGGCGCTGTATGTTTGGCGTATCGTAGAGCAGAACTCTATGGAAAATAACGTGCCTTCACCTGAGGAGATCCAAAGGTACTACAACATAACCGAAAAGAATGGCAGGCAATTTCTCCTTCTCACGCATTATTAAGCCCAGGCTTGCTTACCATGGCGGTAGGGTCGTTTTTTGAATAAAGCGACCTACTGCCAAGTCTCTATGTCCACGCTGCTTATCGTGTCACTCCTAGTCGGGCACCTCACCCCTGTGATCCTAGTTAGCCAAATGCCAGGCCATTCTCTTGTCTTTGACTAGTGTGTGATATCACGTTATGAGTAAGGGATTATCGTAACTAATACCGTAACTATACAGTCAACAATGGGCTAGGCCATCCTGCCACCCCTTCATTCAAATTACTCCTTCTCGGCAAGCCTATTAGTGACACCAAGTTTTGCCTGCTCTTATCGGGACCGTCACTTCAGCAGTTGCTACGTGGGCGCTCCTCGATCAAGTTCAATTAAATGTTGGATTGGGTCCCTGCTGTTGGAATTTATTGGCACTCATTCGCAACCGTACTGATGGACTCGCCTCTTATACTAAATCCGGGCTGGGAATGCTCGGCACTCACTAGAGGTATTATACGGGACCGGGGCCCCACATTGTAGGGTATTCGCTTCGAGTTTTTCTCGTCGAGAACTAGACACTCCTAGCGATGTGACTTTAAATTGGACGGACAGAGACGTGACAGAAAAGGGTTCCTGAGACACTACGCAATAGAACCGTGATGTTTTCAACAGCTATCGTCGTTCCTGGGTCAGGCTCCCCGCGTCACCGGCTGATACCGTTACCTATCCTTCATTAGGCCAGGCATCGGATTTGGTGCAGAGCTGGGATCACTGCACCTCAAGTTTGAGTATACATTGTTTAGGAGTGCTTCCCGGTAATTACGTCAGAGGTAGGGCGAGAGGTAGCCAAATCTCCTATCTCTGGTTGAGGTTCTAGATAATGCCAGGTATACAAGAAGGAAAATGCCGCTTGCTCATGATATCGCTAAGGCGTAATGGTTCCCACTGTGACTATTTAATATCTGGGCTTGTGACACGATGACATATATGCCATGGGGAAATTAGGTATGAACCGCCAGCATAAGACATCACCGTTGCACCACGGCTTGAGAAATTCCTTACACAGATCCAGAGTGTAAAGGTGGCTGGAAAGCGGTCAAGAATGCGACACGACCTGGCTTGTAGTTTACATAGGTGTTCCACTGGTATGTTAGTTGCGTGACGGGTAAATAATCCGGTTGGATCGCAGATACTGCGTGGTGACTTTCCTGTTCGCCTGGTGATCTAGTGTGATTCATTCAAAAGCCTCGCTGGCGGCCTTGTCCTCGGTTCTGCTTTCTATCCGCACGTCTGTAATATTCGATTGCGCACTATGGAGCCCGCTCAGGAAAACAACTGGGCGTTCCCTGCGCCAGCGGCGATCGAAACCAGTCGACGTTCCCTTACCCGACCGGGCCCTACGTTATAGGCCGTAGATTCCACCGAAAGACCTGATATCCTGACCCCATTGGGACTATTGCGCCGGCCACATAGCTTGGGTAAGAGCTGTGCGACGTCTGATTAGATTCGAGTCCAAACGGGCACCCGGGAAGGCAAGCGTCGAAGCGGCCGTATTCCGTCGAATGAAACATTAGCACCGGGGGACGGATATTCTTGAGTTGCCGATGTAATCCAATGAAGGCCATTGCCAAATGTTAGGAGTATGCGAGGGCCTGGTTAAGAGGCCTTTTCCACCCGTGCCGGCTGTAAATACACGCAATTCAGGAGCGCCCAGTGCTCAGTACCCGCCGCAAACTATCGCAATCACCGTTGGCAGCCTACCGGTTGTCATGAAGACGAGGAGTTCCCTCGTTTTTTAGCGTACGCCGCATGAGCTCCATCAAGCCCAATGTGTTGGCCAAGTTTCCGCGGAACAGCGGTGGCGGTTGCAAAGGAGCGATGGTCCAACATAACATATGACTAATACTAGGGCGCAGTAGTTGCGAGTTTGGTTCACCGTGGTCATGGTATACGTAGCTACGTTGTGTGTCGGTACCGGGTATTCTTAGAGTGGTTCTGCCCTGGGGAAATTCTATATGTCAACGCCCATTATGCTCATGTTACGGAGTATATCCTATCTTGCGCGGGATAACTAACGATGAGGGGCGGTCTATTATTGGCGAACCTTAACACGCTTTCAGCGAACCCGGGCCCGATAAAGATTCAACCAGCTCTGAAGACGTTGACAAATGGCTTATGATAGTTGAATCCTAGGAATAGAAATGGCTTTAAAGCTTGTCATATTCTCCACGATCACCATCCGTCATCAAAAGATCCGCGACATCCCACCAGTAGTGCATCATAACTCCATTGGCGCCGTGCTCTCGCGTGAGCTCCGAACTCCGTTACTTTTTCGTTGAGATTATTAGGATTAGCCGGCGCAAGAACCTGATTGTCATGGACAAGAATCTCCATAGTCTTAGCCACGGCACAAGGGTCACTTCTGAGTGAAGGACGACCTAGAAAAACAGAACCGTCCAGGGGAGCAAACTTTTCTTGGCTCATACACAGCTGGCTCCCCCGGCAAAACTAGATTATCCCCGTAGGCGCCGGAGACCTGCCTACCAAATTAAGAGGAACATGGCATGTTCTCGCTTTCGCCGCCATATTTCCACAGTTTGGACTTTCAAACATGGCAGATTGGTGGTTGGACCCCACAGAGATGCGGGATGTATCTTTAATGGACGCTGCGGGTAGATCCCGTTGCTAAATGGAACCCGAGGGCTAATAACCTTTGTGATTCTATGGCTGAACGTGGGCGTCCGGCCGCAATAGGCGGGCTGAGTAATCTGCCCTATCCTGGGTTAAGCATTTTAAGGCGGCCGCAAGAGCATATATTACACTTATTTGCCACCGAATGCTAGCGGAAGCGCTTAATTTCGTGATTCTATTAATCTCTTTCGCATGCGAATGGTCATATTCCTGACTTCACGTGGCTTGGAAGAACCGGTAGATCCGCGCCGGACGCGGTCGGGTGCGCGTAGCTAGGTCCACCACTCGAATTCCGGATCTGTTTTTTCCCAGGATTACGCCGAGGGGGCTAGCTAACTCCGAAGTCGCAGAGTGTATTACAATAATCAAGGCCCTGGGTTCAGCTTGTCGGCCGACATCAACAGGCGTCGATCGATCAGACAGCGATATCATCCGTACCGCATATTAACTCCTCTTTGAGGCGCATAAGTGGTCAATCCTTGCCGGGAGAAATCTTGACGTCCGTGAACAGTTATCTATCAGACTCTGTATCTGATCCAGGATGCTTGATGTACAGAAGCACTCCCTGCTCAAGTAAAGCTAGGCCGCGACCTGCGGTTCGTAATCGAGGCAAGAATCTCGTTGGACCGTTCTTTCCTCCACCTCTCATCTATGCCTCAAATAACGGCTAGGGGGAGACCCTGACTACGCCTGCATGAATCATGTTCCAAGATTATTTTACTGTCCTCCGATGCAAGCAAGGCGAACGGTAGACTGTTGGGGTTGTGCGGCCGTTTGCACGCTTGGCGTCGGTACCTCGCGTGAAAGAGTGTAGACACTGTGATTTTTCTTAAGAAGGGACGCTGGAGCGGACCCGTCAGAAGATTGGGAAGCTCACACTAATTGCGGCTCAGCGGATTCCTTTGTGCAGCTTACCCATCCAACTAGCAGTCGCGAACGGAACACTAATGGGTAGTAGAAATGCGGTGCATCGCGGCGAGAATAGAACGAAGGTAATACTCCCATCACCGGTCTTTAAGTCTCCCAAAGCTGGCCAATTCTTAATCAGGAGTGATTGACCTTCGAGCCTTAGGTCTTAAAACGTCCAACATGTCCCGTTTGGTCCGGTGAGTGATTCCATCCCTCCCCCTGCTCGGGCTTTACTGCCCAGATTAACCTGCGAGTGCCAGCGAGTCTCCGAATTTTCTTAGCAATGGAGCTTTAGCAGTCGGGATGAACAGTTTCTATCAAACTTGGGGGACTCCCGGCCCCAACGCAGAGCGACCGCGCACAACTATGAAAGATCGAAGGTGTAGTGTTGCAGTCCATTTCGGCCATGAGACACTACGTAGACTATCAGTAGTAAGTGACTGTGCGGGCATAATAAGTCACTGACGGACCAAATCTTCTCTCATTATCGTTCTTTGTTATAACGGCTGTCACGTTCGTTGCTTTAGCCTCTCGGATCCTTTGACAATGCGAGAGTCACTCTAATTGCTAACTCAGGTCCACACGGGAAGCTGGGTGAGTCGACGCCAGTGAACCT +3 TTGACGTTTCGCATATCTCAGTTTAAGTGACCTCCTAGGGTCAGAGTTTCTCTGGGCGATGCAAATTTATTAGTAACAGACGTAATACAGCGGGTTTGAGCTGCTCGCCGCATTGTACACTGTGTTTGTCACACAAGCAGGGGGGGAACCTATCGTAGACCGCACGTAGATAAAAACCCAAATTGGAGAGATTCTTATCTGAGTCCCTGCTGACCTTTTTGTATATTTACAGAAGTAACTTCTTATTTTGATGAAGCGCATGTTAAGAACGTATAAAGCGGCATCCTGAGGACGTATGCTGATGACTGCACTATAGACAGTCCTAGGTATCACCTGCCCACGGAACAGTGTCAAACAAATCGGATGCTCTTTGTTTCACAAGTGTCTGTTGATCACGCGAATGGCACTTAGAACGAGAGCGGGGAGTGGCGTATCGCACTTAACCAGTCCACCCCGCCGAGCGGCCCTAAATAAGGGGGTTAGCGTAATATGTATCCCTAAGTAGTCTTCATTTGACGGCTATAGAATACTGGTTTGATTACCTGATTGTACCTTGGGGGAGCGGCGTGACTGACTGAATCTATCGGCGTCGTGATGACCGGCCACGATTGCATCTTGGAAGTGGTTAACGGTTCACTTATTGCGATGCAAGTCGCCCTCATAGGACTAGGGTGCCTTCTGCCCAAAAGCGGCAAAACTGGCGTCATTAAATCACGGACCTATCCTAGAGGAGAAACACGACATCCACGCTAGATCGGGAGCCCGCTGCTGCCCCCGCACAGTCGAATATGCCCGTCGTAATAAGTTATCACTCCCTGAACAGGCACTCCCCGTCGGGATCCTAAATGCCGGCGCATTATTGTGCCTAATTCGAGTCTTGCGCGTCTAAGGCGCATTTCCCTTCTGATTGATCGAATCCTTTCCGCCCGGTACCATCCTTGTATAGGCTCCAACGCCGAGGCAACCCTTCTGAACGATACTAAAGAATCAAGATAGGCTCTACGACTTTATTCGTGCACCACCGACCGAATGGTGAAACCGTCAGAATGTACGCTGGAACGAAAAGTTCACGACGTCACTACCGAAGTTTACCCTTACTAACGAACCAGTGGGGAATTAGCTTACCGTCCAGCGGACGTCGTGAAGCGGGCACTCAAAAATACCACCCCGAAGTGACTAACATGGACAGCATGATCAGAATGGACGAACAAGAGCGATTCACTGAGCGCTGTGCCCGGTAGGACGAGTCTTTAGTCGTGCGTTCTGAAATAAGGTGTAATTAGACGTAGCACAACTGGTTATTTAACCCCGTACGCCAACGACGACTAACCTGACCGGCGGGTCCGTCTGCCTTCTATTTTGCTCATGTACATCGTCAAGTTAGATACAATGTACTAAGTCATCTTAAAACTTAGAAGCCGAACTCATTGTGCTGACCTGAAGACGTTTGGACTTCACTTAACAGCACCTGGGCCCGGAGAGGTCAATGCGTTCTTAGAGAACTACACTTTCCAGCGGTATCCCTGTTACCCTAGCTGGATACTGAAATCTATGTACTTCTAAGAGAATTAGAGTATGATGAACGGGCCTGGTGTATCAGGTGATGACACGTGACAGCAAAGTGTAAGCGTTTATGTTTGATTCAATCCAGCGATAGACAAGAAGGAATCCGAACCTTTGCGTGAACCCACTTTCAAATGACAGATCATTACTTAAGAGATGAGGTGTTGGACGGATAATTGGAGTTGGGAACACATCCTACTTAGCATCAGCTCCTAAAAGATGCACGGTATAAGTTACCAGTTGGTGCCGCCGATCTTCTGAAATGGAGCGCCCTTATGAAGTAAGGATACTTTGTAGACTAAGTGCCTTGTCGGTTACTGACCGCGACAGATTCTTCTTGGTGTCGACAGCGAAAGCTGACCTTAACTGTCGACTGTAACGAGATGGGAGATTCCAAGCCCCGAGTGCGGGAAGCACTCACTGTCGCTAACCCGATCAATAATTTGACGAGTACTAAAATGGGAGGGTGCCGCCGCGTTACCTATACTTCCGAGACACAGCCTCCGAACTTAATAGCTCGGGTAGGTATTCATTGGTTGCATCCGTAGTAGTCTGGCCCTCGTGACGCCCAGAATTCTCACGTGGAGGTAACTTGGAAACGGTGACTCCGAAGCATAGTAATGCCTGAGGGATTCTAGAAAGTGGGGCGCGCGTGGATGGCTAATAAGATAACTACGAGAGTACCTTCCCAGGCGCCTGATGCCTACGGAAGTGATGATCTTGACTCACCCGTGTATAGTACCGCCATTCGCCACCTATCAGACCCCTCGCCTCGTTTGCATACAAGTACAAAGCCCAATGGGGCCGGTACATGGTCCCTGCTGGAGACACGGCCCAGGACCGCTATCTTGAATAAGTTAGAATCGCAAGATTAGGAACGAAGGCGTTACTCTATAAGAGATCATTCCTCGGAGAGAGTTAAAGTTACCGATCCGTCCAGACTTCGGTCACTTAGTACAGATCCTGGCGCATCGTGCAGACAGGCTGGCTGTTACATCACGCTGACAGCGTTCCGCAGAGATGGGTGGCCCGCTAACTGCTAAGAAGCGGCCAAGGGCACCCCGCTGCCCTATGGACAACGGTAACGAATTGAGGGCTATTTGTCTGCGTTGACACTGGTCGCCCCTTCGGTCCAAGAAGAGCAATTTATCATGCTATCACATATGGACCTGAGGACATTGCGCCTTTGAGCATTGTAACTCACGCAATCAACAAATCCTTTAGCGGTATCGACCTACTGTGCCTTCTCCAGCTTCTGAGCCCTGGATAAATAATCGACGTATGCGGCAACCAATCGGGGTGTCTATGGTCAAAGCTCATGCATAAGCACGAAGATAAGTGGCAATAAGGAGTCAGCCAACACGTTCGTCAATTTGCGCGGATTGACCTCCAATACCGCGACTCGTGGAACTAGTGACCTAGACTTCGAAATCTTACTCTCACGCGGCCCTCCTGAGACCTTCTCGATACGTATCAGGAAATATGAGTTCTCGACCATTTACAGTTGTAGGTCTATTCTCGTACTCTGCCCCGCTGGTTCTAGTCGGAATAGAGAGACGTGCGCGAGCCCTACCGCGCCTCAGCGTCCACTCGGAAGCTTCCAGGAACAAGCCAGCCTTTCTTCATGAGAGGAATCCGATAGGTACATTCCCAGGTACGCTTTTCTATCAAAGAACGCCTTGGACATACTGTGCGCGATCCATCTCACACATTGAGCTCGTGGGGGGCAACTCCCTGCTAGTATACACTCCCGACACTCGTGCATGTGTCACGTAAGCGGGTTTGGGACTGTGATGGACCCTCGTAGCTTCAGACCGAGAGCTTCGGGCAGGCTATTGTACGCTCTCGTCCTGATAGCTCTCAATGGCTCAAGTCCCGACTCCCCGCCGCGGGTCGGGTGCGATTCCGTGGAAACAACGGCTAAGGCGTCCGGTGATGACCGACCAGAACAGGCCCGCTGATATCCGAATTTACTCCCGATTTGTTCCGTCTGATAGTCAGGGCGGCATGAGATTCGGTCGGTATTCAGGAAAACCAAGCCTACGATATTCGGTTCCATAGATCTCAACGCCATTCTGTACTTGAGGCATGTCGACGAACGTTATACGGTTAGTGGACTCTGCCGCCAAAGTTACAAGAGCCTGCTAAGTAGGATATCGCCTTAAACCTTCGATACATCCAAGCCTGGTAGTCGAAGGAGTACTAGTGTTCTTGGTTAATTTGCGATTCCGAAGGACCGTTAGAATTATCGTCGTTATTTCGTCGGACAGTTGGACTTTCACACTTCTACAGCACTGCTCGCCCCCCATTTGTACGCGCCGAATTTTAGTAGTGTAGGCCCGTCTGGCTTATTTCGTTTGGTAACTCGACAAGGACAGTGACCACTAACTCTATAGTGCGCTTATCCCCGCGACGATCGGAACTAAATAGGTTCGCGACCATTTATTTGCGGTGTCACAGGAGCAAGGTCAGATGAATGTATGATATGACTAGGCGGTTGGACACGTCGGAGTCTTGCAACGAGCGGCTCGACCGCGACCTTATCTGACCCCTAATTATGTAGTAGTGCCCGATGAAATAGCGATATGCTTGCCGCGATTCACTTCGGGAGAGGCTACGTCCTTCAAACCACGCGTCAGGCGCACGTACGAAACCCCTAGCTCCTTCCCCTTAATAAGGTAGGGCCTATTTGAGTGGACTGTGACCAGGGCCCAAACAAGCATGGATGGCTCCTATTTGCTATCGTGTAGAACTACCTGACTAAAAGTCCCGATGATTGTACCGATTTTGGAGTAGTCGGCAAGAGACTCCTAGCTCCAAAGGTTTCACTCGCGCGCGAGTCACACTTACGCTTGAATGCGTGGAAGTTGTTAGAACCCTCGTCCGGGATCAAGCGCCATATGAGGCCGTTGTCGAACTAGGTTGGCTTTAGATGAAACCCGGTAACCTGGTGGATGGTGCCAGTACTACAGCGCTGACAAGGTTAATGCATCAATGCCCTGTTCTTTGAGACCTTTCCCACAGGGCTCTATCCTCTTAGACCGGGCAATATCACGCCCAAGGGGTTGGCTGGCGACACGTTAGTAAGGTATCTTCGGTTACTTGTAGTTCAAATTGATCCGTCCGTCTGTACCAAAACAACGAAATGCTCAGTACTTACACATTGCCGCGTGTTAATCTACCCGGAATAGACACGCGGGTTCAATTCTTGCGTGACAGTATATTTTGAGCACCTTCTACGATCATGGGTAATCTTGGGAGTATCGCATTACCGCTGAACTCTATTGGAAGGGAGGCCGTTGCTATGTGTGGGCCAGGCCGCGCATGTCTGCATTAGAAAACGGTAAAGCGAGCAGACTTAGGATTATGTATTGTAGTACCTACAAAAGCTGTGCACGGAACGGTCTTATGATTGTTGGATCGATGTGATTAGCCAACAAACGTGTGCATATCAGGCTAGTCCCAGAGCCGATAAGACTGACCATAAGAGAGTATTTGCGTCCAAGCTTGGCAAAAGGATGCATCTAGGTTTGGTTGCAACCGACCCCGGAACGGGCCTGCGGCCCACGCGTCGCATGGAAGAGGTGTGATGGATCCAGCCGACCTGTCGATGCCTTGAGCCCATAAGGATAGTTGGTTCACAAAGTGATAGGCTTCCCGACATAAAGATATTATACCACACAACAGTCGTGATGCACCCGATAAGCTCGACTCCCGGTGTCGCGTCTCATTAATTTCGGAGTGGAATTAACTCGCGACGGATTAGCAGCCCGGTCGCGCCACTCTCGAGTATTAGGACTGCAGATAACGGCGCAGAAGGGTTAGTTCCGATCTGCGTATAGCCTGATGTGCCCCATATATGATTGTAGGAGTCGATCGACCAGGTGACGTTCATAGCATCGGCAGATTGGTATCCCCTTACTTTCTAACGTTGTGAAATGTCCCATAACCGATATCGGCCATCCCTTCGTATAGTACAATTGGTGTTCCGTTAATCTTCGGACTAATTGTTTTCTGAACTGGCCGTCAAGGCGGAAGGATTGAACCGAGACACAGCCACCCAAAGCAGATGTCCATGGGGCCTGCCTCCTATAAGAGATATGCAAATGTTCGCAGCACGCGCGGCTTTTTCGGGCATGCATTACGCATAAGTCCCGAATTTATATTGGGCCGTGTCGGGAGCAGAGACACGCTACACTAGGCACGCTCAGGATTAGTGAGGGAACCAGAATTTCGTGGCCTTTGCAGAGCGGACAACCCCTGTTCTCTCCAGCAGACGCGATCCGGTCTTTAAGCGTTCGTAGGATACCGTCATATGTCCTCTCTGGCTACTGCGGGAAGACGAATTTTTCGCCATGATCTTGTTCTGAGATCTAATAAATGGGCATCAGTGTTGCAGTTCAATCGTTTCTATTCGAGGCCCCGTCGGGTTCTATTGTCTGTCCTATCAATTCGCCGTTATAGATAGCTTACTAGGGACCGGAACGTCACAACTCCTGTTCTCCAGAAGTAGCGCCGCCTGGCAGGTCAAGTAGTTCCCTTCTTGATTGTGGTCGGTAAATGGGGCCTCAATCAGATCACCTGTTACATGTGCGTACCTGGGTGCATTGAATGATATTTAATCTTGTACTAACTCCCGTTGGGAAGGAGTAGGACACTTAGCAATCTTCACTGCAGGCACGGTATAATTTCATACATGACGGCTAGCTAGTCCCCGCTCTCTACAGAGGTACTCTGAGTAATCGCGGCCATACAGTTATAAGCACGCGGTAAGTAAATCTTCCCTCCTACCCGATTCGTTCCTAGCAACCAGCAGTGTCGGTGCCCGATTTTACAAGGGGTAGCAGATACACCCCGCGTCCGTGCTAATTAGACGCATGCAAGTCCCAGCAGCTCACGGCCTACCTTAACCTCTCTGCGAACGTTATCTTAAAATTAACTGTCGTTTTGTGAGTTCGCAGAACCACATGATGTTTGCCAAACTTATCATACCCTATCATGAGTACTGCATATATCTTAAGTTAGAATGAGGATTCGCAATTAATTCCAGCGATGATCACCCGTGAAGGAAACTTTCTTTACCGGGCACACGATATCCAGACGACTTAGCAGGGTCACTTGAAAGAACGGGCTGATCAAGCTATCGGTATGGATTAATGGATGTGTGCGTAACTTCTTACTACTTATGGAGTGGGAGTATTAATATGTTATCTTGCCGCCGGGCGATCGGCAGCTGCGCCGCACCAACCTCCGCATTGCTGTGTCAGTTGTAAAGATACTAACACGCTACGTTAAACCTATCTTACCGTATAGCCTATCGCGTTCGGGACATGCCCTGTCTCGGATTGCATAAAGTTATGCCACAGAGATCCGGTGCGTTATTACGATAGAGCAATCAGACCAGGAGCGGGTGCGCCCATGTTACACATGTTCCCTTCCGGTGATCCAAACTGACACGTTCAGCAACGACCATAGAGTCGAAGATATCCGTATACCCTACAGTTCGGATGTGATAGTGTTAGGATGACTGGGAAAGCGGTCATGGACGGTGAGGCACCCAAAGATCACTCCTGTGGCGGGGATGTTGTTAAGTCGTAACGCCCTATCTCGGTCAACCCTTAGACAACATCCTGGGGCTCCAACGCTGACGGCAGCTCTCCCGCTCACCCCAAGTCCAGTGCGTACTAATCACAGCCTTACTAAGCTTCAAGGGCCCCGTGATTCGGGGCGATCCGTAACCGCCACTGGCAACTCAGGGCTTGACGGCGGAACGGCTCCCCTAGAGGTCGATGGAAAAGCGGGGTACCAATAACCAGATGCCTCCGATCGCGGACCGTCACGGCCATAGGCAAAAGCTAGGAAAGGAGCAGATCCCTGGAACGGGCCAAATATGATAGGGGAACGTAGGAGATTGTGTGGGCTGATGTCTAGCATTGGCGCAATCCGATACCCGAACGTTATCTGACTCCTGTCGGCGTTCACAGGTGTGCAAGACCGACCTCAAGTCCCGCATAACTCAGGGTGGGCGCCATAGGCGTCACTCACGCAACTGTAGGGGCCCGTATCCTCGGGTCGTGATTTGGCTGGCCAACTCTGAAGATGGCCGATCCACACGTATTAATAATAGTAAGGCATACTTTTTAAAAGCGTTGCGCACTTTGTCTGTTGCGGAACAAGAGATGTCGTGTGCAGATTATGACTATAAGCCATGATATCGATCCAAATGCTGTATTTAATTGGACGCATCATGCTTAGCCCTGTTTATTTCTATATAATGTCACGCGCCCTCTCCCGACCCATCAATGGACTTCATGCCTTGTATAAAGGACGGCCATCTCTATATGTCCCAGGCCATTTTGGAGCCCTTTCAGCGCCCCCGGCTTTAGGCAAGTGGTTCTGACAATCGGATGTGTCCAAAAACAGTATTCTGTAATACTCTCACTCAATAAGCAATACCCATCACCGTTTAGTTTCAGCGCGCTCTCAGGTGAAAGGACCGCCGGAAAGTGTATCTTAATGATACCTAGACTGAGCAGCCAGCTTCGTTATTTTGCAAAGGACAACAAAATTGCACATCGGCCTACGGGAGACAAGGTGTTAATGGAATAGGGGTACGGAATTCTCGCGGGGACAACCGTCAATCAAAATGCGACAGTGCCGTCTCAGACATGGGGGATACAATGCAGTTGGTTAATGCCAAGTATCCCTTCTATGGCTTCCCTGTCTTAACCCGTTCATACTATAAGGAGACGATGTTTTTATGACATAATGAAGATGTGTATTCCCGTGCCGCAAAAGTTCAAGGTCGCTAATAATGGCGGATATTCTTAACCGGACCCAATCATGCAAGTGCGGTCCGCTGACTCCGGATTACTCCTTACGGGGGGCCTAAAATATACAGAGCCGCGTACGGTTGTTTGGAAGCTGAACTTACTCGGTGTCTTATGACTGCAACTCGAAGTAAGCTTCTAATAGACCGGTCGAGTAATGTCTAGTCTCGTACGGTAATCTCAAATAGGTTGCCAAAAAAGAAATGTTAACTACTGCGTTCGAATCGATGATCTCGATTGTGACTCTACCAAGGGTTCTTGTATGATGCTTTGTATAGCCGATGACAGATTCCTCACGTATGCGTAAGTGGAACAGTATTTGGCGTACGCGGGCGAGCGGTAAGGGGCTCGTGGACGAGCGAATAATTTTCTATATGGGAACTCTCCTTGCATGCCGGTCCAATAGGGCGAACCCGACAGATTCTGTGGCAAATTGTGGTATAAAGTTCCAGCTAAGGGTTCCCCGGGGCCGTATCGTAAGAATAGGTGTGCACAGAGTCCAAGGCCCCCTGCCCGTTATCGTATGATCCATTAGCTCTGGGCACAAGTTGGCTATCATCTGGAACACGCGTAAAGTGGACGTTCAGACATTCGTACGTCTCAATGCGTCAATGGTAGAGACCCGGCGTGCAGAGGTGTCATACGCCATGCATCGGCGAGGCGGATCGGCGTTAAGGGGTGCGTCGGAGACGCGCAAAACTGATTGGACTTTAAGGACGTTCACCGCCCCGCTATTCCGCGAACACTGTTCGTCGCGATCTGCTTAAGTTGCGGAGAAGAGGAAGTAACACAGAGTAACGTACATCGCTCGGATTCAGGGACGGTATTTGATTAAAAGGGCGGTATGCGCATAGTTCAAAAAGTTGATAGCTACGTTGCATCGTCGACCATTTGCAAGACGTACGCTTTCCATAAGCTGGGGTTTGGTAGGATTCTCGTTGTACGGCTCCCCGACTATGGATAGGGGGAAAGAATTCGGCTCCTCTGTGCCTCTTACACTTAGGACCAGGCATCTCCTACCGCCGAACATTTTGACGCAGTATACCGGTGCGCTACGGCGAGAAGACAACGACGCTTAATAACGGCTGCCCGCTCTTTTGTAATTGGGAGGGGATCATCGTCCTAGTAACGAGTCAGGTCGACGTTAAAACAAGGCCGCTAATATGTAGGCGAGTACCAAATGTGCGGTAGAATGAAGCCAGATCAACTTCTGCCTGTGTTTTCATTCCAATGTTTTACCGAGGGTCCGAGAACGTATACCTGAGTCGCTAGTAATACGGCTCTTCCATTATTTCTGGTCGGTACCGATCGCGCCTGGGCTGGTCGTAGCGCCAGATTTAACACTGTGAGCGCGCCCTCGTACATCTAAACGAAAGCCGCCCAGGACCATTTGGGACATGAGGCAGTCCTTGGACAGTAAGTACCTTCCTGCTGGCTGACTATCCTTCTCATCTTCCATACTAACTCTACGGGGCCCAAATATCTGTTAACGAACTACTGTGAGCCTGACGACTCCTGTTAGTATGATTGCACGTGCTAGGTGCAAGCGAACCTACCGCTCTGCGAGACTCAGGCTAGAGTGTGCGTGAGTCTTCAGGCAGGTATCA +4 ATCCATGGGATCAAGCAAGTGATATCCCCCGTATCATTCATCCAGGCTTCCGTTTTAGACCTTCGATGCTCCCTAGGATGGTATAACGCTAAGTGCACGCCAATACTTGTGTCCTAGCGCTCGGACATTGTGGGTACTCTAATCAACATCTAACAAAGGGCCTCAGATGCGCCCTGTTTCCGTACCGACTCAGCGTGTCGATGTGGCAACCTCTACGGGTTGCCATCCTTGGAACCCCAACGCACGGTAATAGTTGATGCGAACCCCACGGTTCGCACAAGAAGAGCATGATCTCCAGCACCGCATCTAGGTCTAGATAGCCCCTTCCTTTTAGTGAACCGTACCCACCGACGAAGTCCCGTTCCCTATTAACGCAAAGGAGCTCGCAGTTATAGCTAAGGTCCGTAGCACGAGCCGTGATGCAGCTTGTCAGCCATAGGAGGAGGGGCAGCAGGTAGTAGTAGATTTGTACGACTTTACGTATGCCGCTAGGTCTACGGTCATCGTCACATTGACCAGGACTAGATCCGCCCGGATGGCAACTAACGAGACTCAAGCTGATTGCACGGTCCTGGTCAGCATTCACTGTACCTCCAATTTCGCGACACAGGAGTACGAATCCCAACTGCGGTACACGTGCGTATTGCAGCCAACGTAAGCCGGATATCTTGCAGCACATATGGTGCGATTTTCCCCGAATGAATGTTGGTATAAACCCCAAACTTCGGGCGGATGAGCTCATCGGACGTTCAACGATCTTTGCGACAAAATCACGCGGACATGGGTATTATGTGGCCCTCGGTCCGGCTATTTCTGCTACTTAAAGCTATCCCGTACCCTCTTTTGAAGGTGACAAATGCGGCGGGATAGGGCCTTACACGAGACCGAACCCCTAATGCGTACATTAATGTCAGATTCGCTAGCTCCACGATGCTAAGACGGTGGGCACTCAAACTGAGTGCGAAGGTTACTGACACCGGGACCAAGTTCACAGTTCCGCCCCATGTTAGACTGTTCTGTGAACTTTGTCTGGTATTTGTGGAACCGAGGATATCTAAGCCGTTTTTTAATAAAACATCAAGAAAACCGTCCCAGGATGGGCACAGAACGAGTCGAGCGACTTGCCGCCCGTTTACTTGTAATGTATCAAGCCTCATTTCTGCACATCATTTCAAAGTTTATGAGGAAGCCCCAGACGGTCGTGAGTTAGTACCCATTCAGTCTACAAAGTCGAGAGCACATGACTATACCCAAACACGTTTAGTTAAGAACGTTTGTACGACTGTTTGCTGTATATGTGACAACTGTCGCCTCAGGTTGTACACACGTTGCCCACACCATCATGACCTTACCTCCCGCTTTTTGAACGAAGTCGCGCCACCTGTCGCAAGTAGTTGTGCGAGGGAGTTCTATTTCCGGGATGATCATGAGGTAACCATCATGCAGCTAAGAAGTCAACACCGCGGGAGAGACAGAATTGTTATAAGGCGGCGAGGAACCCTTTCGGGGTTTCCCACTGTTATGTCTATGCATAGGATTGCTCGGCACGCCGCGTATTCATAACCGTCCGTGAATAATGTCGAGGATATGTCAGCCACCCCGACCGGAGACGTTACGCTTAATCTAGCTAAAGTCTAGCTCAACCGTAAGTACTCCATGGGTTCTACGTGATTCATTTATTTCAACAGTCTGGCCCTTGAGGGGACAACTCCGATAGTAGGGTATTAAGTCCCCGCCAGTATTTCCCTCTCTTAAGACAACTAGGCGGCCTTTCTCGAGCCATTGATGGGATAATTGTGCTAAAATGTAGCAGATATGTACGTTCATATACACCTCGGCCTCATGTCTGTGGGATACCAAAGAAGAATCTAACGAAGCCTTCGCCTAGGTAATTCAATAATTCTTCTAAATCCCCCTGCTTGGCTATCCTAACCTTACGCATGCGGCTGTAATCGATATGAAATATGATTTGGATGTGTTGGCGCGGACGGGGTCGTGGCGACTACGCCTCTTTGCGCTTCACCCTTGGAATTTGCCAAAATTTGCATATCTGCTTCACCCCTTGCTCCAGGGGCCCAGTCATAGCGTCCGTCGTTGACGGATATCCGAGTGATCTAATCGATGTTTGTGAACGTGAAAGGGTCCGCACCTAGGCCACCCTTCTAGTTTCTCAGCGTCTCAGCGTCTTGCCCCAGGGCGACGAAGGACAAAGTATACTCACGAGGCCCATATGGGTTAAATCTTGAGCATCTGCAAGTGCATATCTTATAAGTTGCTCACTAACTTCCACGTGAGAGTCTAACTAACGAAACCGATGCAGAGCGCTCTCCAGTACTTTCTGCCAGACTTCAGCGGAAGTCGGCAAAGTCACCCAATCCTTACACCGTTCTATCTCCAATGATCCTTTCCGACTTCTCCCCAATCTTTCGTAGGACAGGTTGCCGGACCAGGACAGTGGATTGAGACTCCCAGGGCGATTGTGCCGCGAGCGCCTCACAGCCCAGAGTGCACGAGTGATTATATCATGAGCACTTTATCGTCGGCCGATGTGGGGCCTCGGCTGATCACTACATGAATGGGCTGGCAGCTTGATGGCAGGGTACCATTGAGTCTCCATACCTAAGATTCTGATGTATTGGGGATTGGGATAGAGTAAGCGTCAGGAGACCGCAAGTGACTAAGTTGGATTAGCGTAGGTGCCACTGTTATGGTTACCGACTAGACCTATTTCGCTGAGTCAGGACCAGATATTGGTTCTGTTCCACTCAGGTTCAAAGATAGTGGCCGACACGAGGTTTGGCCCTCAGGCCTAGTATGTGATGTTTCCGTGTAGCGTGACGCTCGGCTTCATCGTACGTGTCCAGCTCGACAATTAGTCAGTCACGGGGAGATATGGGCCAAGCGGCATCGCAGCCAGCCCTAGGAGCGCCGCGTCCTTCAATCACTGAGGGCCCGTTAAGGGGGAAAGGGTACATATGAGAAAGGGGGAACGTAGTCAGGACCTCTCACTCCAGATGGCGGTGCTAGGGGATGAGACGTCTAGATTCAACTTCTTGCATTTCCTAAGTGGCTGTTCGAATGTTTTGGCATAGGAGTGAGAGCTGTGGACCCAGGTATGGTGTCGACCTCCGGCTTACTCCATAACGGAGCTTACTAGTGTCAGACTGGTAGCGATCGTGCATTATATCGCAAGACTATTCCGTTCCGCGCATGCGGAGACAAATTCCTGCTTGAACTAGGCGGACGCCTACTATTCGTAATCACGACGTAAACGCGCTCTGCCAATTGAGCTAATACTTGCGACTCTTACAGCACAAATAGTAGCTTGTGCCGATCGCTTTAGCTATGTCCATGATCACACCGATCTGGCACTGCAGAGCACCGCGAGGCCATCCCCTTCGTGAAAAAATTGATTCTATCGCGTAAATCGTCGTTGGGCGTCGCCGACGATGGCAGCAGTGCAATTGGAGGGCTGAGACCGATAGCGCTAGGAGTGCTCCACCTTTGACGGTCTCTCTCGTGCGGATAACTGTACCAGTTTCTGTGTACTGTTATCGTTAAGCCATCCGGACTGGTGTTTGTGGACTACGCTCCATTGATTCACCAAAGGGCAGGATTCTAACATCGGTTTTACTTAGAATGATCTAATGACTAACGAGATAGTCAATCACCTAGGGCGAGTTATATCAGCCCTATTCAAACTTGTCTATCACATACCCGTTGATCCACGGTGCCGTACGGCTCCACAACGCTCTCTGGCTACTCGAATAAGATTTCCAATATGCCTTGGGAATTCTAGAATATACCGAGCCTTAGTAACTGGAGACCCCACATGGTGACACGTCTAATATCCCTTCTATGTAGAAACCTTAAGAGTAGGTTGCTCTAGGAAGGAAGCTCTGCTGCGAGTGGTGATAGTTAGTGACCCACAGCTAGCTGCTGACGTAGAAACATACCCTACGCTCGCCGTTAACGTAGATGATTATCACATAGCACTCTGCGCGGACACTCAGAATAGGCTACGTAACTCATACTCTGACCCGGCGTCAAAAAATTTGTTTTGCTGTTTACTCGCAAGTCATTTGGCAAGCCCGAACCTTACTAAGCAAGCGTAAATAATGCGTCGGCGCAGTCTTCCTCCATTGCCAGACAACAAAATACCACACTAAACAGCCGTTTCATTATTATTGCTAGTTCACTAAAATGACGGGGGTAGAAAGCTTCCCCCGAAATAGTTGTTCTTCGGAATTCAAGGCAGCGTAACAGTCCACTGGAGCTCTGGCATCCCGTCACGCCTGTCTAAGCCCTATTTAGTTACGACCGGTTCACACTTAATGCAGGCAGCGGTCCTCGTCACGAGATGGTTGGCATCAGCCATTTTATGCGCCCTTAGTACGTATTATGTTTGGCTACCGTCTCCGATGATGGTAGCTCAGGCCGTGGAAAGGTGAAATTTTCGGGCTGCCCGGTTGAAATTACCCGTTCGCTTCGTTCGGATGACTCGCTCCCAAGATCAATGTCGTGGGAGGGGAATAGTGTTCCAAGTGATTGCCCACGCAAGTGACCCCGATTTTCAGTATGTAACAAGGTTGGCGGTTGCACGGGTGAACTTCCAACTCACCACCCGCTGGACGGCTCTAGCTTGTGCATTGCCTTAACGAGGACGTCCAGGCCCTGCCGATTCTAAGAAATGTCAACCAGTTTCATGGTCATGCTGTCGGAAGGGATTGACGCGCGAGCATCTCGTAGGTGAGACAGCTTTATGCGGCGACCAAATTGGTAGATAACCAGGCATTATAGGTGTAGCGGGCCCGTTTCAACACAAACTGACACAGAGGTCTCCATCCTGAGGCCCTGTGATGGTCAACCCTCAACCGTATGACTTGTGTGCTGCAGCCTCCTTCAAAAGTAAAATATAAGAAATGTCGGGGGGGTTATACCGTGTTATGCCGCCCAAATTCGTCGGACTCCGTGTCGTCGTCAGGTGTGCCTTCTTCGTTACGATACGATTAGAGGGAAAGGTTCCCCAACTCGGACGCTACACATGTGGCAAGGGCAACCCTCGTAATACTAACTCAGATTGGATCTTACTGACTTTTTTTCAGGTCAAGGCGTAGATGTGTGATGCACGGATACACGCACTTCTAACGGGGCGACCGATATCACCCACGTCGCGGAAACTCAGCACCCTTCCCGTTTATCTTCAGCTGTAGAATCCGGAGCCGACTCCCTACCACAGAAACGGCATCTAGTTACGGTTTGCGAGCTCGGTCCTTGTTCTGTGTATCACTTACGCGCGTGCAACGGCCGAATGTACATGCAAGGTGAAAGCCGCGACCAAAGCCGACTAAAGGCCAAAGATGTAAAGTCAGCCAGACCAAAGGCAGCAGTACTCATGCGGTGTCAGAAACTACGGTGGACTGGTGGATAATACGACAGGCAACGGCAATCACTCATGATAACGCCCTAAGCGACAGTAGGCATTCTACCGGCTCTGTTCCCGGGTGACCGTGACCATATCATTGATGCGCTGTCTTGTCTGTCACGAGTACCTACATGACTTATGAATTCCTGCGCTCGTGACATCCTCTTCCGTACGGGCGACATAAGAACAACGCAAACTAAAAAAGCCGTCAAGTAACGCTACGCACTGACACAACATGGTGATAGTGAGTTTTCCCTAGCTAACACGACTTCTAGCCTTGAGAGGCGGCGCCTGTACGAAAGTCTATAGCCTTTGCGTCGTACATCATGGGGGACGAGTTTTTATCACTTGAGCTGGTACATGGTTGAAAGACCTCGCTGCCCTTTAACCGCTGATACCACAGTCCCATGTCCACAAAGTAACGAAGCGGAATAATAGATCGTGTTGACATCGCCGAGTGGTAAGGAGACATATGCCCCCGTGTTAGATTGAATGCCTCTCCCTGCTCTCATTGGCTACCTCACTCCTAGGTCCACTTGGATTAGCCCCTGTTCGCCCGATCATAACTTTTGGGTTATTCATTTATTCTATCAAGGTATCACCCAGTCGATGCTGGCCTGCCTTCCGTTGCTTCTTGTAATGTCTTACTCTTGCTTCCGAATAGTCGCATTCGGCATTCCACTTGTCCTTTGAATGCACCGTCTCAATTGCATCCACTTATACTTCAGTGCTCCTTTACTGCGCGGTAGTCGAATTGACACTGTTCAGTTTAATAAACCATGCGAGAGCTCTATTAAAATAGACTTCTGGTATGAAGGGTATCGAGCAAACAAGTATACAATTCTACCATAGGATGTTCATGAGAACGTTGCAGGTGCTGCCAAGTCCGCTCCCGTTGTGTTGATACCTCTTCTTCACCTTTATTGGCATGGTATCCTAACGTTGGATGAAAGGACTCCAGGAGTCTATCCTATTTACAATTCAAAGAAGAATTCAGTTTGACGTATCCTCGTGAGACGTCCTGCATCGGGTGAAACGGTTTCACGCATTCGAAAGAGAAAGGCAGCGGTCTTGGGCGAACTGTAACGTCCAATAAGGCTAATTCTCGCTCTGCCTTTTATTACACTTGCTACTAAGCGCCGGTAGGCAGATCTATGAATAGTTATCCCAGACATAGACGTTCGCATGCGGGCAGTTGGCTCAAAAGAGGTGGTACCCTGGACTACAGGCAACCTATTATCTCTATTTTGGTCGAAGGTGATCCACGACGAAGGGTCAGTCGCCTTCTTTAATAGCTGATCGGCTTGGACTGGATGTGAGTACACCCATGCCTAGCGTTTTGCGACGCTTGCCGCCCGGTAAGAGATATGACCAAACAGTCGCCAACGAGCAACGGTGAGGGCAATCACTCAGTGGGGCAAATGCAGTGTGGACCGCACATGCCTGGCTCTCTAAGAACGATTTAGACCGAAGAGCGCAGGTCAAAGTAGATTGTACTCGGAACTAAGGTTCTCGGGTTGAAAATAGTAACTCCCTGACAAAAACTTGAATTTTGCTAGTCATTGGCGGCCCAAATCACATTTCGACCTGCAGACGTTTCCGGTGTTTGTACGGTACTTCCTGCGAATACGGGTTTTCGTCACGAAATTTTTACTTCAACATTTAACCGGGCATGAACCGATAGTTAACTCAATGTTTGGGTCTGATCCCGCCGGGCGAGGCGGCAGGCACTTCGTCGATATAGCATGGATCGCTATGATTGAATAAAGGAGTAGCCAGGGGCGACCACTGTGAACACCTCTACTGGTCCAATCCGTGTTTGGATTATTAGCAGTGGAGTAGTTCTGCGGTTCTCAACTTTCAGCCCGAATTGGGTTAAAAGGTATAGCGTATGCGCCATGATATCGATCAGAAATCATCCGGAGCAGGTACGGAGTTGAAGACGTAGCTAAGTTCCTTTTCGACAACGAGGTCCCCGAACTAGGAGAAACCCTCATACTATTACGGGGTGGTAAGCAGCTTGCGTAGCGGCCTGCCATCCCTGGTACTTACCGGAGAAGTCGGGTATAACCTCTATATCTTATGCGTTCACCAGTAAGCTTTGGGTCGAACTACAATCCTTCTACGGAACATTTGAAAAAGCAGCAGAGTTGAGTTCTAGCCTGGAACGCATCGACTCGCTGTCCGATCTCTCTTTTGATCAGTACTTGTATCTCGAGCTATATCAGACCGGTGACATGCGCCATATTTGAAGTAGTATTCCATGACGTTGGGGCTGCTTGGTCTGCGCCTACGTCTAGCACTATCACCGCCCTTCCTTGCAATTACCTAACCGCTTAGGGTAGACCTTGGGAACACGTCCGCCTCGTTGAGTACTTCGAGATCCGTGAATGGTCGGTGATCAAAAAATCGCCACAGAGGCGAAGATGTGTAAACAACCTGCAGTACCAAATGCTGAAGATAATTCACAGCCAAGCCAGACGTTGGGTCTCGCAACTGACGGCGTTTTGATTCCATTACTTGCATAAGGGAAGCGGACCGAAGACAGTCTCACGTGGGAACGGTATCGAGATCACTATAAGCAAGTAACCATCGTAAATCAAACGATCTATTAATAGCTCGAAACCAGATGTAAGATGGTCCTTGCCCTGCCGGAATGCAATCACGAATATTGCGGGTACACTGTATTAGTATCGATCATGCTCTTTTCCCGTGAAGTTACGTAAATTTGTATATCCGGGAGCACGAGCAGAATGACTTTTATCCGGCCCGGCGACTGTCCGGAATGAGTGAGTACAATACCAACTGACTGAGTAGTATTTCTGGGACTCGGTTACCACTTTTAAGTAGCCGCATGCGAGTCACGGAGGTCTCCGGTTCAGCAAGCCTGATGACCCTCGGGGAGGACGATTCAAGGCCATGGATGAGAGTGGGCCGGTATTGTCAAAGTTGCGTTATTGAACCCGAAGCGCAATCTTGGTCTGTTAATGTATTGACTAGCGTCGGGAAAATTCTTTAACGCAGAGGCGTCTTTGATTATCAAGCGCTTGAAACTTTTTGGCGGAGTGTCACCACTATTCTCGCGAAAGGGATTAGCAAATTGAATTCAATGAGGGCACTGTCCTCTTAGCTTTCACCCCAGCCCTCTGGGTTTCCGCGTGGGTGACTATGATGTTCTACTCCCGGTCCGTGTTGGTGCACTCATGGTGAAGCTCACGGTGCTTTGGTCACCCTAATTACATTGAATTATGCATTGAAGTATGCTTGTCCATCCCTTTTATGTCAAGACGGCGCTCCGGTCTACGGAATATTACTACACCTACTCTCTGTGCTTGTCAGACTAACAAAACGACTGTGGTACCATATTTGAGACGGAATCTTGAACAAAACCACAGTCACTCACTAGAAGGACCTGTGGCGCGATTGGCCCTTTGGTGCAGAGATTACTCGAGGTGATCTGTGGCGGGAACCGCGAGCTATCGTGGTTACTTATAGCCGGGCACGGATCGCGAACTCGGCAAGACAGATGTCGTAAACACGTAGTGTTTAAGGTCGTCTCCCGATGCCCAGATTTAACCCCTTTCGCCTGACACATCCTTGATGTCCGCCGTCCCGAGCAGCAGTGCACGATCTACGTTTACGCCTCGCGCTAGGGGAAGGCGGATGGACGATGGTTTCACGTCGTGTATTGAACTCTAATCACTCAGAATAAGCATCCGTAAATCCGCTGGTAGACACCTACGCTGAGGCCACATATATGCTGTCTGGCATCCAAACTATAGAAAGCAAGTGGGCTGCGCCGGTAGGAACTCTAGCAAATCGTCTAGAGTAATGTACGGTGTATGAGGCCACGGGCGATACCTGTACTAGAATACCTGGTGTCTGGCGCCGGGCGGGGCATGCACGGGAATGCCCATCTATTCCCTGATAGCCATGGGCCCATCCTAGAGTTTCATATATTCACCCTAGGCAGATTTTACCTTAAGCATAGTTCTACATTCTTAGACGAAACGTTGTTCTGGCGCGTTTCTCATACCATGTATGCTTTAGTTACACCCGGGGATGGGCATTGTATCCCATCCGGTCTTCTTGGCGCCTATGCGGCTTTGATCAAAAGAGGATTTGTCCACGCGTGCCGTTCCGTCATACGATATGGCAGGCCAGTGCCCAACGAACTAAACCATGGCATAATAATCGCCGAATCGGTGTGATCGATGTACGCGGAAAAAGGTAGCCTGCGTTATCAGCTCTGCTGAATACGACGTTTTTTGCTTCGAGTGTAAGACTCACGTATCGAACCTCCGTCCTTCACTTGTAATGATGGCAGGCCGATCAGTAACATCGAATCGTACTATATCAATTGCAATACTTGTAGATGTCCGGCCTCGACAAACCAACTGTAATCGGACCCTTTCTATTTGGAATAGAGTGCGCGGTTGTTCGTTGGTTTTCCTTTATTAATGGGAATTATCAGGGTAAGAGTGTACATTTACATTATAAAGCAATGGTGTCTGGTTGTAGTGTGTCCCACACATTCACTCGTCATTTTAAGCTTTACTCAGCCTGTTGTACAGA +5 CTTCACCCGGCCGCTCGCGGCTCGTATGTCCGACAGCGACTACACCATTGTCCCTAACTTTAAACTGTAGAGCGAATGTCAACGTGGCCGTACGTCCCCGGGTATGATCTGCGGCATCGTTAGCGTGAACATGCGGTAGCCAAAACCTTATTGACCAACCTTTGGGAGGAAACCGCCTTTACAAACGCATTCTCACGATACATTCGGGGCGTAACTGTCCGAAATGATCCAAAAGAATCGGGAATAATTAGTACCTAATCAATTCGTCGTCTCGTAAGAAGGTAGCCGTTGTGTGGCGTCACTGCTGGAGGGCACTATCCGCAAAGGTTAGGCCGGGTGTACCATAGCATCCGAATTCATCCGATTGCGGTATTCCAATAGCCGTTGAATTAGTGGAATAATTTGAGGCTGGGTCTGATCATCTGACTATTATGCCTGACTCCTGCGGGATTTGAGCATGAGCGCGCGATGAGAGTTTATAGGGAATACTGGAGCGTGCAAGCTGTGCCGGCCTCTACTTTATCAACTTGGGCTCAGGGCAGGTTGTGCCGATTGTGTGTTACGTTTAACTCTGTGATGGGTCCAGGCGTATACGCAATCCGTATACTATAAAGGTACAACAACTCCACGCGACTAGTGGGGATTAATATATGACACCGGTCTGCGGTAGGGGAACCCGGGGTCGTCGGTAAACCTCGCTCTCCTCTGCTGAAGAGACGCCTGGCCACATGTCCACCTCAGCGGGCCGTACAAAGTCTTACATCAGTGATCTGCGTGCTACAAGAGTGGTAAGTATGACCTCTCTGAGTCGATTGCACTGCACTGCCGCTTTGTATGAAGTACTAGCACTGTAATGGTTTGAACAGGGGCGGCTACGCACCCGCCATTACTAGCTACTCTTTCACGACTTGATGGCTAATCGGCAGGCCTACCGTCCGCGGACGAACTCGTATTCGGAGAAAGGTAAACGCTAAAGCCGTTCGGTGCCCTTCACAGTCATCGACGTCCAGTATACCCATTCTGACTGAGCGGGAGGTAAAGCCGTGAGTATATCCGTTCGAACGGCTATTCCAGCGTACATCTTCCCCGTAGTAGAACCGTTGCAGATGTACGTAACTTCCATTATTATATGGACTCTGAACTAGGTAACGGGTCCCGTAAAGGCCGAGATAGATCACTGAAAACCCAACATGTCGCTCGGGTCCCTAATCTAGCCTCGTAGAAAACGGGTACGGAAAGGACCTATCCTACACCAAAATCCGGCACATTCAACTCAGACTTATACGATTATTAGAGTAAGTGTCCATTTAGCTAATACCTCGCGGACCCCCGAAGATACGGCGATTGATGGTAACTCGGTTTCGCATTACATCCTTTGTCTATTCGACTAAGGGATCGTCCCAGGCCCTGAACTCCAGTGAACACCCACACCGGCATGCCATGGTACACACAGTCGGAACAACTATAGAATGACGGTGGATTAACCCGAATGGAGTCCGCACCTCAGGTGTACTACAGGTACTAGGTTAGAGCGAGCTCCTCTTTTTTCTTATGGTCAAGGCAATGTACAACCGTCCGATGATCGCGCGTGTTGCGGCTCTTTTGGTCCACAAGGCTCAGGAACGGGCGAAATATCCCCGAGCGTGTGGCAGAGCCTAGGCGTGACTTACCGACGGCGTACCGGCTCATACGTATTTTACCGGGCGCTATATCATAAATATGTAATAGAAACGGAGAATCTTCCCGCCGGTTGGTTTGTTAGCAACTCTTTGTATATAGTTCTGGGCGCAAGTTTACAACAAGTTAAACGGGATTGCGCGCGGTCCGCTGGTATTAGATCCGGGGATTATAATCCGGTACGGGGTACTAAGGGAAAGTCTGTTTCGGGATTCAAGATAACCGAACCCGCCAACCGTGTAAGTTTTACAGTTGCCCTGCTGAGCGAGTCGACACCCCTTATTTAAGTATGTCCTAGCAACTTCGAGGCAAGGCAAAGAGTGTAACGGGATTAGGGCCTGACTCTATTAATGGATTGTCAGAGGTAATGGAAACCACCCACAAGGGTTTACTATGTTTTGAGACTCTTGGCCGACGTGAGCGAGCACAAACCTCCCCATATGGGCAGTCGACGTGTCTAGCCCGTGCGCCAATGAGGCAGGCGCAATGGTCCTACTTGGAGAGCCATACATTAGCCGCCGCAGCCGGACCCAGCCGCCGTCTTTCCGACGAAGTTATGCGAGCAAGCATATACGCCAGGCCAGGCTCCCAGCATGAGCCCAAAGTTTGAAGGGCTGACAGGCATCAAAAACACAATGTGTTACCATCGACCCATCGAACTTCGCTCGCTGGGTCGGTCAGCCCGGACTAAAGAGAGATAGAGAGTTTAGGGCGTTCCTCGCCTTGATTCAAGGGGTAGGGCGCCTACGACTTGATGCACTGTGTTTCACATCAGTATGCGGAGTTGTCAGCCTCAACCAATGGGAAGCCAACAACTGCGAGCACCGTTAGAGGTGCATGCGGTGTCCGCTAGACATAGTAGTACCTTCAGGCCATGCGATTTGGTTTTTGAAACCGGTCATTACGCGTAAGTGAACTCGGCCACAAGTTCGCCGAATGATCAACTTAAGCGTCCCCCACTTCAACTGCTGCACGCCAAGTGAGAACCGAACCTCAGGGTAACACATCTGGGTACACACGCCCTCCGTATTGCGAATCTCTAACACGAGTAATGGCTCACATCCGAATCACCGGCTCAGGTTAAAACTGGTGAACTGTAAAATTCCGATTCTTGGTCACTGAATGCCCGCGCTACACTGGCTAGGCCCGCTATTTCATTCTTGTTGGGACGTAGGGCGTCGCGTAGGGGGGTTGTTCACCCATAAAGCGTAGCGCCAGGTGTAGCCGGCTTTTGTTTCAGGATTATACGCAAGTGTTGGCCCCCTCAACTGGGCGTCAAGGTCGGGATGTCATTCGCTAGCAACAACGTTAGATTACCTCCGTCACCACCCAACCCTGTCAGGTTCTGAAGCATAAGGGGATCTAAAACTAACGTCGGTCCTAGGTCCAGGCACAAGTGTACTTTCGAGGCGACTTTTTCACAACGCAGCAAAACCTTACTGCGATCTGCCCTCATAAGCCAGTCAGGTCGATGCGGGATCGACGTGATTAGTTCCTTATGTGGAACGCAATACGCGTTAGATCTACCACCACGCATTCATCCATCGACGGAGGCAAATCGCTTATTTCGGTCTCGATTCTGTGTGTGGCCTTAGCGGGACCCGGATTGCGGTGAACAGTTATTTCGGACCTTGTCGGAGCCGGCGCCACGTGACACTCTCATTACGTGCCTGATGTGTCCGGATGTATGTACATAAAGCGTCCTTTGTGATGGAACAAGTCGTCGCAATTCAGGAGGTGAGGGCGGCGAGGGGTGTTGGTGACAAAAACCCCGTGGCCAGCTAGTTTTAATTCTTGATAATCCGCGGTCATCGATCCGATCCCACGCTGTCCTGATTTAAAGTACTGGCTTAATTCCACGTAGGGGTTTTCTTGGCGTGTTCCGTTTACCACGACTGTGCGGCTAGATTACGATCAGCTAAGGACTCCGCGGTGTATCCACAACGCTAGTTATCCCCACGCAGGAAAGTAGCTTTTACCATGTGTGAATTGGGGCGTGGCGTTCTCCCGAAGAATCCAGCGCACAGTTTGCCCTATGGGTAGTCCGTACATCCAAGTGCCCGTGGTATCGTCTTGAGTAGCCGCGGTAAGTATTCATGTACGATAAATCGATTACCCCTCGACGAGTATAAACTCGTGCATCTACTTTGGGTCCTTACTGTAGATGGCGCCGTGATCTAGTCATGTCAACGTCTAGCCCGTCTGGTAGCTATCGCCATGAAACTCATTCGAAAGAAATAATCACCCACCCGTAACAGACTTGGAATGTGGATTTTTGCGGATACTGAACCAGCTGTCCGCCTGGGGGGAGAACATCCCAGAGGGCGAGCAATTAGCATCGCCTAGAAATAGTCGAGGGCCTAGTCGAGATCAGAGGTTTATAAAGACCGCAGGCCCTTGGCCAACCGTTACGTCCTGGCCATTACGACTATGTAGCTCCACGGTGTGTTGAAAGTTCGACGAAAGCCGTTGTCAAGTCAGCACGGATTTAGTACCGTAATGTCGTGGTCGTTCCATAAACTTCTTTCCCCCCCCGATTTGAGTGGTCGAAAGACGAAAATCGTAGCCCAGTACTGTCTCCAGCGGAAGCCCATAGGCATAAGAAACTGACGTCATTCCACCACAAATCAAGGGCGGCCTAGACGATGCTTTCGACCGAAACGGTGCGTGAGCGTACAGCAACCGCGCCCACTAGAATTAGGAAGAAGGCTGTTATTGATGCCACTGTAAATTGTTTTTTGCACATGTTTACACAATAAATGTCACGGGGCCGTCAATGGACTATTGCCAAAGCCCACTCTGGTGGGCCGTCATGTGGTGTCGGTTCAGCAAAAGGTACATGTTCGACTATTGTAACCGTTATTCGGCCTGGGGACACCTCCCCAGATATCCCGCACCCTTAAACCGCATATCGGCCGGAAAGATCCTTACGACCAGGGAGCGGGAACTTGATCGACTGGTTGTGTCATTCTCATAACGGAAATTAGCCAGATCCCGCTGCTATATGATGAAGTGGCGTAACACGGTCACGGGCTTACTGTCCCGACAGGGGCCTTAAAGGGAGTTCGCTACCGGGACTGACAGTAGATTGACACTGGTGGTAACATTTTCGATACTGGTGGTATTACTCGTCTCATCTCCTTTTACAGTTCAGAACGTGGTTGAAGGGCTATGCAGAGCACGTGCCAAGTACGAGTTGGCTAGGATGTCTCACCGTGGAGCCGCTTAAGCCTCACGGCAGAAACATCGATTTCTCCTTGGGCGATGTTAGTTGCCACTCATTCTATGTTGTGTCCGTCGTCGTCAAATCTAACTCTATACAAGCCTCTCTCGCAGGGGTTTGATAGAGGTGACGAAAATGACACAGACCATTGGCGGCAACAAAGCCTAACTGCCTCGAAGCTTGTCCCGAGCGTTTCTGTATATACATCCTAGCTCGCTTGAATTCAGAGTCACCGGCCCGTGTGGAAGGCCAGATGTTCGTAGGCTTCTCATCCACTACCTAATTGATAGCTTGTGTTCCAACTCCGTGACAAGAGTTGACATCGTCTCTCCACCCAGGTTGCACTCGAATCGTATAATTTGAGTCCTAATCATCTGGCTCTCGCTGCGAGATGTGGAACCGCAGACGGTCCTGCCCGCTTGGGGGACGGAGGAAAGCTCCCGTTAACTATATCGCCCGTCGTGTAATATGATACTCTTATTGTCGGGTGTAAATCGAGCCCGAATACGTTTTTTTGTCGTCTAACAAGCTCTACCTCATGCTTAGTATCCACCGCCGACGTAGCTCTCAAACATATTTAGTACTCCGCGACGATGAGCACAGTCGTGTGTAGTTGGTTTACATTGATTAACATAATGCCCGTTGGTCGACATGACCTACAATACTATTAAACTGCCGACGAGTGCTATTCTTCGCGGCCCACTTTACGGGCAATCATTCCATTTTCGCTTCCTCGCAGTCCATAGCATTGGAAGCGGTAGATGCCCGATATGTCCGTTGGAGTCGAACAAACACCTGTTGATTTAGAACCCAAAGAGTCGGGGACTGTTGCGCGGGTTCACCTAAAGAGGACACGTACCAACGTACCAAGAGAAGTTTCGGGGGTTTGATAGCCGGCGATCAATCTGTACGACTGTTCTATTATCTAATTGGTAAGCACAGGATAATGCGCAGGGGAGGTAGTAGCGTTGTCTCTATGCCAGTGTGATGTGAAATTCTCGTTAAACGGTTGATGACCTATCGTTTTGGCATTCGCTCCCCAGCCAGGTGTAGTGGTAGCCGTTGTTTGAGGTACTTGTCGCAAAAGAGTCTGTGCCGTGGTTGTCGTGTCTTATCTCCGATATCTCATCTAGAGAACTTATCTCACGGGAGATTGCCGGCGGCTAACGGTCCTGTGCACTCCGTCGTGAGAGTATGGCCTGCTCCGATAGACGAGGTTTGTCAGTCTGTAACGCCGCCGTTTGTGCGCCCAATTTCAAGCCCATCAACCTCTAGAGCCTAGATAGAAACGCTTTGCAGTGAGGGTACGATGGAGGTATGTCTGGGGTTCTGACTAATTCACAGTAAATAGAGTGAAACGTTTAATGGACAGGTTGGGAAGATGTAGATGCCGCTCTGGGTGTGAAGTTCTACGTATCAGTATGAATCGAAAATACACTCTATCTCATATGACCATTAATAAGTCTTTTGCAGGTGTCGCGCTTGATCCCAGCCGTTATGGCCCCCTTTTGGTCATCCGGACAGAACCTGGTATTAAATATATACCCATCGCTAACCGCCCATAATCGTGTGATTCGACCGAGATCTCGTGCCTATCGCTTATCTGTGCATGTTGTCTGTAGATCACCTCTTTTTGAACGCTTCCTTGCAATCGAGAGTAGCACATCAGCAAAGTAGGTATCTCAGCGAATAAGGTTTATAAGATATGTTGCCGTGCGCGTCCCATTTGGCAACTATCTAGAATACTGCTGCGAAGCGGCATAGGTTGGAGGAGCGAGCCATGAACGGGGTTAAGTGGTCTAATGAACCTCGGACATGAACCATCAGCATCTCTTCATCCCTGTCCTGGAGACCTCCTGCGACATTAAGACACGGAACCTCGTTAACCATCGGTTGTCGGCGGCTCGTGGAGCCTGGAAGTATTATCCTGTGTCTTATCGCCGCTTCACTTGTCTTCGGGCCGAGGTCACGCGCCGTACACTAGCGGCAGTTGCTGGTGCGGGGTTTCATCTCGCATCGGTTGAAATTACTCAAGGTCCGGCATTGATACAAAAGGTTTTCGTGATCAGATTGACACGACCGCTTCGACCAGTCAGCCGTTGTACACTTGAGATTCCCGTTACCCGCTTATAGAAGCGAAGGTATGGGATAAAGAGATTGACAAGTACGAGAGACTCGCGAATGGTGTTTCTGACTTTTACCCTAACATTACTGGTACCCAGACCTGTATATACCGGCTCTACAAATCGGGTTATACTATCTAGTCTTCTGCGATTAGGCGTACAAGAATTCCCTAGTTCCTCGTTAACCTAGCATCCGGGCCTGTTAGATTTGACCGAACCAATCCGTATTCTTTCCATGCGTTCCGCTGTTAAGGCCGGTCGAGGAATTCATGCCTAGTATGCATCCTGTTCGGCACGGCGGCGCCGCCGTACGAAAAGATACTGAATCGACTCGTGAACCCAATGTGACTGCCCCCGTCGTCGTTATTTCGGCTCCGGTGATACATAAAGATCTATTCGGCGAACCGCGGGGCCATCAAATGTATCGTACTCAAGCACGGCCGCCCTGAGTGAGTTCCATTATCATGCAGCATGCAAGTGGCGAGTGTCTCGAACCACCAATTGCACCACGCACTCTCGTCCTCCACTACACACCCTCATTGTTGTTCGGGCCTTCGCTCTCAAATGCTGCAGAGCAACAATAGCGCTACCAACCGCATTATGTCCAAGTGTTCTTTGGGGCTCACTGCTCTGAGACAGAACGATGAGAGGACCTCTGAGGTAGTTCAAGATGCGGCCGGCGTGAGCACGAGCACCCGGCCGTCCTTTAGACAATCTTGGGCAGAACCATTTACTATAAGACGTTCGACAGGTGTGAAAATGTGTTAAACTTAGACGTGCCCCGTACTGTCGTTGAGGGTTCACACATCGAAACAGTGATCGAAGAAGCGGATTAGTAGGGCCATGGAGATATTGGTACAATGCATGCGACTCAGCTGTTCTACAAGAATGGGCCAGACTCTACGAGGACCGCTGCCCACGAATTTGACCTCGGAACACGTCCGTCGGGCCCAAGTATCCCAAATCGTTTTGAATATCGGCCGCTCTGAGCCGACGTCCCAGTCCCGACGGTTGCTCCGCTGAGTGACGTCGACTTGTAAGTATTACCGAACATAAGCCCCCAGTATAAATCATCTTAACCACAAAACACCAGACGGGTAGGGCGCTTACGCATACCTGACGCCGTAGACTCATTGGGGATAACAAAGGGTAATCCGCTCTTCATAGGCGACACTGACACTGGGGCTACATCCAGGATTGAGAGCTTGGCAGGGATTAGCGCCGAGTTGTCCCCCATTTCATACAATGGAGATAATATGCACATTGTTACAGAGATGCTAGAATTCTACAAATCATAGTTTGGTCCGCTCCTAATCTCGGCATTTGTGCCGATGGATCGGTGTCTGCCTAAGCGCGTGGCTATTTATCATCACTATTAGATGTATTCGGCAAGTGATGGTCCTTTTCTGGCCTCACTCAAGATAACCTCTAGGTTACGTAGATGACTAGTACTAGTCCTCAGCATATTCAGCTAACGCCGAAAGTCCTTTCTTGGTACTTGACAACATCACTCTGTACAGCTTTCCTGTCGTTAGAGGGGTGGGTATACTTGCGCGAGAATTCCTGCGCCGAGGCGACATCCCGCCGTCGGGACATCTCAGGTATACACGACATCGCACCGTAGGGCCCGGTATTTGGGCTATGCCTGTTCTGCGCCAGCATGGCGCACACCAATGCGTTCGGCAAAGTACAGAGTCTATTATCATGGCGTTTGGCCCTATCGAGTCATTATGATGGACCGATGACGGCTTACAGGAGGATGGGTGTTGTTTTCCGCGGCATCCCAAAAGCCCTTAGACATGTATCCGTTGTTATGCGTACTGCGTTATCTTTAATGTTATTTCAGGAGTATCAAAGAAGTAGTGTTGAGTAATTATAAGCAGACGACCGGCAAACAAAATTCAACGCGCGGCCGCGATTAGCCATGTACAGCTTGTGGTCGACCGAATGAGTACTCCCACGTAGTCATTGATGCACTCTTGAGTTCAGCCGCCCTGCGTATGGACGCTTATGACTATCTAAACCAGCAGGTCCGAATAAATCGGCATAACGATATCCGGTTGGTTATTGAGTCATACCACACGCGGTCTGCAACTGTTCGCCCCGCCATAACTTGCTGGACTTTCGCTCACGTCTTATACTGTGGTCATCCACGCTAGTTGTACTATCGAAAAACGCTTTTCGTGTGCAGTTGGGATGGGCCCAATATCCCTCAGATCGGTGTCCATTTGAGCAGTAGTATCGTTCGTCGCTCCTGCAGAAGCGCATCTCGCCTGAGTGTATAGTACCCTCTGAGGTTCTGAGCAATAAAAGGGCCAACTCGTTTCTTGATGCACCTTCGTAACTCCGTTCCTACACTTCGTCATAGAAAATCCTCGCCATTTTCCTCAGATAATCCTTTTTATGGTACCGGAATCTTTCGGCTATGATAAGCTGGCCCAAAAGGACACTCTCGTTCCCGACTTCCTCGTAAGGTTGCTGACGGACTGGATTTGACGCGGTCATTATAGATCGTTTCCTGAAGGTACGCGCTCTACTCCATACTTTGGGGGTGGTCGTGGTTGAAGTTAAATGATCCATGTGCCTACGTCTGAACTCATTGTTCTAGGATTCGTAGGGGACCCGCCATGCCTCGATTTATATACCTTTAGCTACCTATGCCATCGAAGGTAGAATCCCATAAACTGGGATCACGCTATTGTGAATTCATTCTCACTTGGATCACCCGATATCGATGGTTCGAAGACTGAATGGCAGGACTGGTGATGTCTGCAGCAGTCATAGTCGCTTAAATTGGCCAGCTCTAGAAAGAGCTGATAAGCCCATGGGCCAGCGACGAGTTTAGTATAAGTCGAAGGTACAGCCTTTAAGCATGAAAAACTTACCAAGGTATGGCTCTGGGCTGGCCCAGCACCACTGTTAGTCGCCAGAGAGGCTCTAATAGGTAATCGCCATCTGGGGATAAGCAACATAGGAC +6 TCCCATGGGCTCAAACAAAGGAGATGCCCCCTATCACTCATGCAGGTTTGCGTTTTAAATCTGCGATGCTCCCTTGGATGGTACGTTGCTAAGAGCACGCGAAAACTTGTCTCTTAGTGCGCGGACATTGTGGGGACTCTCATCAACATCTCTGAAGGGGCCTATGAAGCGCCCTCTTTCGGGACCCTCTCAGCGTATCGAAGTGGCAACCGCTACGGGGTCCCATCCTTGGAAGCCATACGCACGGTCATAATTGATGCGAACAGCCCGGCTTGCACAAGAAGCCCATGATCTCCAGCACCGCACCTAGGTCTAGATAGCACCATTCTTTTACTGAACCGCAGCCACCGACGAATGCCCGTTCCCTATTAACGCACAGGAGTTCGCAGTTGAATCTAAGGCCTGCAGCACGAGCACCGATGCGGCTTGTCGGCCATAGGCGGAGGGGCTGCTGGTCGCAGAAGATTTGTACGACTTTACGTATGCCGCTAGGTCTACGGTCAGAGACTCATTGCCCAGGAATAGATTCGCCAGGATGGCAACTAGCGAGACGTAACTTGGTTGCACAGTCCTGGCTAGCAATCAATGTAGCGCCCATTTTGCGACGCAGGAGTACAAATCCCAACAGCGGTACACTCGCGTGTTGAAGGTAACGTACGCCCGATTTCTTGCAGCACATACGGCGCGATTTTCCCCGATTGAAGGTTGTTCTGACCCCCAAGCTTCGGGACGATGAGTTCAACGGACGTTCAAAGATCTTTGCCGCAAAATCAGGCCGTCATGGGTATTACGTGGCCCTCGGTCCGGCTATTTCTCGTATTTAAAGCCATGTCGTATCCTCTTTGGAAGGTGATAAATCCGGGGGGATAGGGCCGTACACGAGACTGAACCCCTAATGCGTACATTGATGATAGATCTGCTGGCTAGCCGATGCTAAAACGGTGGGTCATCAAACTGAGTGCGAGGGTTACTGACACCAGAACCAAGTTCGAAGTTCTGCCCCAAGTTAAACTGTTCTGTCAACTTTGTCTGGTATTCGTGTAACCGCGCATATCTAATCCGTTTTTTTATAAGACGTCAGGAAGACCTTCTCAGGGTGAGCACAGAATTGGACGATCGCCGTGCCGCCCGTTTACTTGTAATCGATCAAGTCTCATTTCGGCACATCATTTCAAAATTTTGCACGAAGCCCCAGACGGTCGTGAGTTAGTATCCATTCAGTCTACAAAGTCGAGAGCACAGGGCTATACCAAAACACGTCTAGTTACGGTAGTTTGTACGAGTCTTTGTTTTATACGCGACACCTGGCGCCTGAGGTTGTATACACGTCGCGCACTCCATCCTGACCTTACCTCACGCTTCTTGCAAAAAGTCGTGCCACCGGTCGCAAATTGATGTTCGAGGGAGTTCTAGTCCCGGCGTGATCATGACGTAACCATACTGCAGCTAAGAAGTCAGAACCGCGGGAGAGAGAGAATTGTTATAAGCCGGCGGGGAAGCATTTCGAGTTTTCCGACGGTTCTGTCTGTGAATATGCCTGCTCGGCACGCCGCGTCCTCATTACCGACCCTGTATAATGTCGAAGTTAAGGCAGCCACCCCGAGCGGCGCCGTTACCCTTCTTCTAGGTGAAGTCAAGCTCGACCGACAGTACTCCAAGGGTTCTACGTGAGTTATTTATGTCAGCAGGCTGGCTCTTGAGGGGAGAACAGCAATAGTCGGGTATTAAGTCTCCGGCATTATTTACAGCTCTTAAGACAACTAGGCGGCCTTTCGCGACCCAATGATGGGGTAAGTGTGCTGAGACGTCGCTGATATGCACCTTCCGATAAACCTCGGCCTCTCGTCATTGGCATGACAAAGAAGAATCTAACCAAGTCCGCGCTCAAGTAATTCAAGAATTTCTGTGAATGCCCCTGCTTGGCTAGCTTAACCTTACGCATGCGCTTATACTCGATCTGAAATATGATTTCGATGTGTTGGCGCGGACATGGTGGTGGCGAATTCGCCTCTTTGCATTTTACCCTTGCAATTTGCCAATATTTGCATATCTGCTTCAGCCCTGACTCCAGGGGCCCATTCGTATCGTCCGTCGTGGACGGAAATACGAGTGAACTAATCGGAGTTTGTGAACATGGAACGGTCCGCACCCAGGAGATCCGTCTAGTTCTTTGTCCTATCAGCGTGTTCCCCCTGGGCGACGAAGGCCAGAGTATACTCACGACGCCCATATGGGTCAAATCTTTAGCTACTACAGCAGTATATCTTACAAATCGGTCACTGGCTTCCACGTGTGAGTCGAAGCAACGATACCGATGCAGCTCGCTGTCCAGACCTTTGTGGCAGACTCCAGCGGAAGTCGGCAAAGGTAACCACACCTGATAACTTTGTATGTCTACTGTTCCTTTAAGACTTCTCCCCCCTCGTTTGTAGGACAGCTTGCCGGACAAGGACATTGGATTGCGACGCCGAGGGCCATTATCCCGTGAACGCCTCTCCGCCCAGAGAGCATGGGTGATTAAATCAGGAGGACTTTATCTTCGTCCCAAGTCAGGCCTCTGATGATCACTACTTGAATGCGATAGGAGCTAGAAAAGAGGGTACAATTGAGTTTCCTTACGTAAGATCCTGATTCTTTGGGGATTGTGATACAGTAAGCGTCAGGAGAACGCAAGTGGGCAAGTCGGATTATTGAAGGTGCCACTGTTGTGGATAACGACTGGATCTAGTTCGCTGACTCAGGACCTGATTGTGGTTCAGTTCCACTCAGGTTCAAAGAAAGTGGGCGACTCCAGGTTTAGGAGTCAGGCCGAGTATCAGATGTTTCCGTGTAGGGTGAGACTCGGCTTCATCGTACGCGTCCAGCTCGACAGTTAGCCAGTCACGGGGACATATTGATCAAGCGGTATCGCAGCCGGCCTTAAGAGCGCCGGGTCCGTCAATCACTGAGCGCGCGCCAGGGGGGAAAGGATACATATGAGAAAGGAGGAACTTAGTTAGCACGTTTCACTCCAGATGGCGTTGCTAAGCGACCAGTGGTCTCGATTCATCTTCTTGAGCTACTTACGGGTCTGTCCTACTGTTTTGGCATAGGGGTGAGAACAGAGCAGCCAGGTATGCTTTCGACCTCTGTCTAACGCCGTAGCGGAGCTTACTAGCGTCAAACTGATACCCATCATGCATTATATCGCAAGACAATTCGGTCAAGCTCATGCGTAGACAAATTCCTCTTTGAACTAGGCGGACGCCTACGATGCGTAATCACGACTTAAAAGCGCCCTGCCGATTGAGATAATACAAGCGACACATACAGTACAAAAAGTCGCTTGTGTTGATCGGTTTAGCTATGTCCATGATTAGAGTGACCTGGAACTGCAGAGCACCGCAAGGCTTTCCACTGCTTGAAAAAACGGACAGTGTCGCGTAAATCGTAGTTAGGCATCAGCGAAGATTGTAGCCGTTCACTTGGACAGCGGAGACCAATAGCGCTAGGAGTGCACCACCTTTGGCAGTCTCTCTCCTGCGGATAAATTTACCAGATTCAGTGTACACCTATTGTTAAGCCATCCGGACTGGTGTTTTGGGACTACTCTACATTGATTAACCAAAGGACAGGATCCTAACTTGTCTTTTATTCATAGTGATCTAATGTCTACCGAGATATTCAATCACCTTGGGTGGGTTATATCAACACTAAGCCAACATGTTGATCACAAACCCGCTGACCCATGGTGCAGAATGGCCCCACAACACTCCCTGGCTACCCGAATAAGTTTTCCATGATTCCTCGGCAATTCTCCAAGATACCGAGCCTGAACAACCGGAGACCCCGCCTAATGACCCTTCTAATATCTCATCCATGTAGTCACTTTAAGAGTAGGTTGCTTTAGGACGGATGCCCTGCTGCGAGTAGTGATAGTGAGTGACACACAGCTAGGTCCTGACGTACCCGCCTAGGCTACGCTCTTAGGGAACGTAGATGATTATAACACAGCACCTTGCGCGGTCGCCCCGCATCGGCTACGTCACTCATACTCTGACCCAGCGTTACAAAATTTGTATTGCTGTTTGCTCGCAAGTCGTTTGTAAAGACTGTACCTTAATCTGCGAGCGCAAATAATGCATCGGCCCAGTCTTCCTCCATTGCGAGACTACAACATAAAACACTGAACTGCCGTTTAAATATTATTGCTAATTCACTTAAATAACGGAAGCAGAAAGCTTCCCCCGAATTAGTTGTTCTTCGGAATTCTAGGCAGCGTAACAGTCCACTAGAGCTCTGGCTTCCCGTAACGCCTGTCTAAGTCCTATTAAGCTTGGACCGGGTCCCACATAAAGCAAGCAGCGGTCCTCGTCACGACATGGTTGGAAACAGCCATTTTATGCACCCATAGTACGTATTGTGATTGGCCACCGTCTCCGATGATCGGAGGTCAAGCCGAGGAAAGGGGAAATTTCCGGGCTGCCCGGTTGAAAGTGTCCGCTCGCATCGTTCGGCTGGCACACTACCACGATCATTGTTGTTGGAGCAGAATAGGGGTCCAAGCGGGTGCACACGCATGTGGCAAGGATTACCAGTATTTAACAAGGTTGGCGGTTTCACGGGTCAACTTTCATATCACCATCCGCGGGATAGCTCTAGGTTGTGCAATGCCTATACGCGGCGGTCCAGGCACTGCCGATCTTAAGAAATGTCAAGCAGTTTCATGGTCATGGTGTCGGAAGGGATTGACGCGCGGGCATCTCCGAGCTGAGACGGCATAAAGCGGCCACAAAATTCGTAGAAAACCTGGAATTATAGCCATGGTGGGCACGATAAAACACAAACAGACGCAGAGCTCTGCATCTTGAGGCCCTGTGATCGTCAACCCTCAACCGTATGACTTATGTGCCGGAGCCTCCTTCAAAAGTATAATATAAGGAATGGCGTGGGGGTTAGTTCGTCTCATGCCCCCCGAAACCGTCGGACCCCGTGTCCTCGTACGGGGTGCCTTCTTGGTTACTACACGATTAGAAGGAAAAGTACCCGACCTTGGACGCCACACATGTGGCACGGGTCACCCACGTAATATTAACGCAGATTGGATCTTACTGACTATTTTGCAGATCAAGGCCTATCTGTGTGATGATCGGGTACACGCACTTCTAATAGGGCGACCGATATGACACACTTCGCGGAAGCTTAGAACCCTTCCCGTTTGTTATCAACGGTAAATCCCGGATCCGACTCGCTACCTCAGAACCGACTCCTTGTTAGGGTTGGCGAGCACGGGGATTGTTCTGTGTAGCACTAATGCGCGTGCAAGGCCCCACTCTCCATGTAAGGTGAACGCCGCGACCGAAGCCGACAAAAGGCCAATGATTGAAAGTCAGACATGCCATAGGCCGCAGTGCTCCTGCGATGTCGAAAACTGCGGTAGACTGGTGGGTGATATAACAGGCCACGGCAAGCGGTCATGATGAGGCCCTAATGGACATAATGCTGTCTACCGCCTATGTACCCGGGCGACCGTGAGCATGTCATTGAGGCACTTCCTTGTCTTTCACGAGTACCTACAATACTTATGAATACCTGCGCTCATCAGTTTGAATACGTTACCAGCGGCATCAGAACAGGGCATTCTAATAAAGCCGTCTAGTAAAGCTACGCACTTACACCACGTAGTTATAGTGAGTTATCCGTAACAAACACGAGTTATAGCCTTGTGAGGCCGCGCACGTACGAAAGTTTATATCTTTTGTTTGGTACATCCTGGGGGATAAATTGTTATCACTCGAGCTGGTTTATGGTTGAAATACAACCCTGCCCTTTCTCAGCTTATACCCGAGTCCCATGTGCACAAACTATGAAAGCGGAATAATAGATCGTGTTGACTTCGCCGAGTTGTAATGAGACATATGCGACGGTGTCAGATTGAATGCCTCTCCCTGCACGCATTGCATACCTCATTCTTAGGTCAACTTGCATTAGCCCCTGTGCTCCCGATCATACCTTGTGAGTAACACATTATTTCTCTCAAGGTATGACCCAGTCGATGCTGGCCTATCTACGGTTGCCTCTTGTACTTCCTTATTCTTGCTTCCGAATTATAGGATTCGGCATTACACATGTCCTTTTCATGCGCCGACTCGATTGAATCCCTTTACTCTTCAGGGCTCGTTTACTGCGCTGTAGTGGAAGTGACACTGTTCCATTTAATCGTCCATGCGAGAGCTCTATTAAAATAGACGGGTGGTAAGAGGGGTATCGACCAAACAAGTATACAATTTTCCCCTAGGATGTTCATGAGAACGTGGGAGGTACTGCCAAGTCCGCTTCCGTTGCGTGGATACCTCCTCTTCACCATTGTTTGCTTGGTATCCTAACGTTGGATGATACGAGTCTACCAGTCTTTCCTATTTACAGTACAAAGAAGAATTCTGTCTGACGGATCCTCGTGAGACGTCCTGCATCAGGCGAAACGGCTTCACGCATTCGTAAGAGAAAGGCAAAGATCTTGGGCGAATAGTAACGGCCAATTACGCTAATTCTCGCTCTGCAATTGATTACACTTGCTACTAAGCCGCTGTGGGCAGATCTATGACTAGTTGTACCAGACATAGACGTTTTCATGCCGACAATTGGCTGGAGAGAGGTGGTACCCGGGACTACTGGCAAGCTCTTATGTGTATTTTGGTCCAGGGTGAACGACGACGGGAGGACGGTCACCTCCTTTTATCGCTGATCGGCTTGGACTGGATCTGTGTACCCCGATGCCTAGAGTTGTGGGACGCTTGCCGCCCCTTAGGAGATAAGCCTGAACAGTCGCCAACGAGCATCAGTGAGGGCAATCACTCCGTGCGCCAAATGCAATGTGGATCCCCCGTGCCTGGCTCTGTAAGAAGGATATATACCGAACAGCGGAAGTCAAACTGGATTTTACTCGGTTCTAAGGTTCTCCGGTTGAAAATAGTACATTCCTGACAAAAAGTTGAATCTTGCCAGTCAGCGGCGGCCCAAATCACATTTCGACATCCAGACGTTTCCGGTGTCTGCAATGTACTTCCATCGAGTACGGGTTTTCGTCACGAAATTTTTACTTCCAGATTTAACCGCGCACGAACCGATAGTTAACTCAATGAGTGGGTCTGATCCCGCTGGGCGAGGCGGCTTGCACTTCGGCGCTAGAGCATGGAACGCAATGATTCAAAAAATGAGTAACCAGGGGGGAACACTGTCATTACATACACCGGTCCGATCTGCGTTTGCATTAGTAGCATGGGAGTGTATCTGCGGGACTCAATTTTAAGCGCGAACTGGGTTGAATGGTATAGGATATGCCCTATGAAATGGATCAGAAATCACCCGGAGCAAGTTAGGAGTTGTGGAGGTTGCTAAGATCCTTTTCGGCAGCAAGGTGCCCGGAATTCGAGAAACCATGATAATATTAAGGGGCGGTAGGCAGCTTGTGTAGCGGCTTGCCACCCCTGGTAGATAACAGAGAAGTCGGGTATAGATCATTTATCTTTTGCGTTCCCCATTAAGCATGGAGTCGAGATAAAATCGTCATACGGAACGTTTGAAAAGGCAGAAGCCCTTAGTTATAGTCTGGATCGCAGCGACTCGCTGTGCGATCTCTCTTTTGATAAGTAGTTGGATCTCGAGATATTTCAGCCCGGTGACATGCGCGATATATGAACTAGGTTTCCATGACGTTGCGGCTATCGCGCGCGCGTCGACGTCTAGCAATATCCCCGAGCTTCCTTGCAATTACCTAACCAGTTAGGGTTGACCGTGGGAATATGTCCGCCGCGTTGAGAACTTCGAGATCCGTGAATGGTCAATGATCAAAGAATCGTCGCAGAGGGGAAGGTGTCTAAAGAAGCTACATCACCAAATGCTGAACATAATTCCCAGCCAAGCCAAACGTTGTGTCTCGCAACTGCCGGTGTTGTCATTCCATAACTTGCATGAGGGAAGGGGACCGAATACAGTCTCAGGTGGGAACGGCATCTAGACCACTACAAAAAAGATACCATCGTAAATCAATCTAGCTATTAATAGCTCGAGAACTAATGTAAAATGATCCTTGCCCTTCAAGAATGAAACCAGGAATCTTGCGGGTACACTTTATGTGTATCGATCATCCGCGTTTACCGTGAAGTTATGTAAATTTGTAAATCCGAGGACACGAGCAGAACGACTTTTAGGCGTCCCGGAGTCTGTTCGGAATGAATGAGTACAATACCAAATGACTGAGTAGAATTTCTGGGTGTCGATTAACACTTTCAAGTAGTGGCAAGCGAGTCACAGTGGTCTCCGGTTAAGCACGCCTGATGACCCTTGGGAAGTACGATTCAAGGCCATGGATGAGAGGGGGCCGGAATTGTCCAAGTTAGTTTATTGAAACCCAAGCGCAATCTTGTTGTGTGAATGTATTAACAAGCGTCGGGAAAATCCTTTAACGCAGAGGCGTGTATGATTATCAAGCGCTCGAATCTTTTAGGCGGAGTGTCACCACTAGTATCGCGAAAGGGATTAGCAAATTGTATTCGCTGAGGGCACAGTCGTCATAGGTTTCAACCAAGCCCTCTCAGTTTTCGCGTGGGTTACTTTCATGGACTTCTGCGGGTCCGTGTTGGTGCACTCATGGCGAAGCTCACGGTGCTATTGGTAACCTAATTAGAATGAATTATGCTTGGACGTATGGTTGTCCTCCCCATTTATGTTAAGACGGCGCCCCTGTCAACTGGATATTACTACACCCAGTCTCTATGCCTCTGAGACTAACAAAACGTCTGTGGTACCATATTGTAGACGCTCTCTTCAACAAGACCACACTCACTCACTCGGGGGAGCTGTGGCGAGATTGACCCTTTGCTGCAGTCGTTGCCCGATGTGATCGGTGGCGGTAACCGCGAGATATGGTGTTTACTTATAACTGGGCACTGAACGCGAACTCGGCAAGACAGATGCCCTATACTCGTTGTGTTTAGGGTCGTCGCCAGATGCCCAGATTTAACTCCATTCGCCTGGAACATCGTTGTTGCGCGTCGTCACGAGTTGCAGTGCACGATCTTCGTTTACACCGCGCGCTAGGGGAAGGCGGAGTGTCGATAGTTACGCGAGCTGTATTTACATCTAATCACTCAGACTAAGCCCCCTCAAATCCGCTGGTTGACAACTCAGCTGAGGTCCCATAAAGTCTATCTGGTATCCCATGTAGAGAAAGCAAGCGGACTGCGCCAGGAGGAGCTGTGGCAAATCGACTAGAGTAATGCACGATGTATGAGGCTACGTGCGATACTACTACTACAATTTCCGGTGTTTGGCGCCGGGGGGGGCATGCACGGGTATGCCCGTCTATTCCCTGATAGTCACGCGGGCATCCTAGAGTTTCATATAGTCACCCTAGCCTGCTTTTACCTTAAGCATAGTGCTACATTCTTGTACGAACCTTTGTTCTGGCGCGTTTCTCATACCTTGTCTGGTTTAGTTACACCCGGGGATTGGCATTGTATACAATCCCGTCTTCTTCGCGCCTATGTGGATTTAATCGAAGGAGGATTTGTCGCCACGTGCCGCTCCGTCATACGATATGGCAGGCCGGTGCCCACCTAACTAAACTATGGCATAATAATCGCCGACTGGATGTGATCGATGTACGCGGAAAAAGGTAGCCTGCGTTATGAGCTCTGCTGGATACGACGTTTTTCGCTTCGATTGTAAGACGAAGGTATCGAACATATATCCTTCACTTGTAAGGATGGCAGGCCGATCAGTCAGCTTGAATCGTACTATGTCAATTGCAATACATGTAGATGGCCGTCCTCTCGAAACCGACTGTTATCGGAGCTTTTCTTTGTGGTATTGAGTGTGCGGTTGTTCGATGGTTTTCCTTTATTACTGGGCATTATGAGGGTAATAGTGTACATCTACATTATGAAGCAACGGCGACTGGTTGTAGTGTTTCATACACATTCACTCGGCAATTTAGGCTTTTCTCAGCCTGTTGTACCGC +7 TAGACGGTCCACACGTCTGAGATTAACTGACCTCCTAGCATCAACATTTCCCTGAGCGAGGTAAATTCATTCGTAACAGCCTTGATGCGGCGGGTTTGAAGAGCTCGCGGCAACTGACACTGCATTTGACATTCATACACGCTTGGCTCCTATCCTAGACCCCTCGTCACTAAATACCCAAGTCGGGGAACAACTTACAGGAGGTCCGGCTGAAATTTTTGAATATTTACAGAAGTAACTTATTATTTTGACGTCGTACTTGTTAAGAACGTTTAGCATGACATCCTAAGGACTTAGCCTGAATACTAAACTACATCCGGTCCATGTTTTCACCGGCCCACCGAAGAGTCTCACACAAAACGTTTCCTCTACATTCCACTGTTATGTTTTGATTACGTGAATTGCGCGGTGACCGAGAGCGCGGACAGGCGGATCGCATGTTATCAAGCGACCCCGCCTAGCCGGGCTGAACAAAGCAGTTGGGTTGAGAGTTATCCGTAAGTTCTCTTCTTTCGACGGCTATAGAGTACTGGCTGGACTAGCTGATTGTTCATTGGAGGATTAGCCTGATTGACGGAATCTAACGGTACCGTTCTGGCCGGCCAAGCATGCAACTTGGAAGTGGTTAACGGTTCGCTTATTTCGATTCAAGTCTGCCTTAAAGGGCTACGGGGCCTTCCGCCCAAAAACGGCAAATCCGGTGTCATGTAATTACAGACCGATCCTCGAGGGGAAACTCGGAATCCACGTTGGATCGGAAGGCCGCTGCTGCCTACGCACGGACGAATATGCCGCTCGTACACAGTTATAATTATATGCTCAGAGACTCCCTGTCGGGTTCCTAAATCCCAGTGCAATACTGTTCCGAATTCTATTCTGGCGAATGTAAAGCGCATATGCCGTGGGATTGATCCGATCGACTCGTACCGATACGCTATTTGGAACCACTCCAACGCAGCTGCAACTCTTCTGGACAATTGTAACGAACCAAGAGAGGCTCTACGACTTTACACTGGCACGGCAGGCCGGCTGGTGCAATCGTCAGAATATACGCTGGAAGGAAAAGTTCGTGCCGTCCTTAGCGAAGATTGGCCTTTCTTGCACACCAGTGGGGGATTTGATTCTCCTCAAGCAGACGTCCTGACACGGGAACCAAAAACTCCCAGTCGGAAATAACGAGAATAGATAGCATGATCAGAAAGGACCAACAGGAGCGTGACGCTGAGCGCTGTGACCCGAAGTGTGAGACTTTGGTAGAGCGTTCGTAAAGCGCGCATAATGAGAAGTAGCCCTAGGGGGTGTTCAACCGCGCACGGCACCGACGATTCACCTGACCGGCGGATCAGCCTGCCTTCGGTGTTGCGATTGAATATTTACCAGTTAGAAACAATGTTCCATGGCAGACTGAAAGTTAGAAGGCGAACTCGTCCGGGTGGCCTTATGACGTTTGGACGTCACATAACAATTTCTCGGCCGGGAGAGGTGAAAGCTTTCTTAGAGTGCAACACTTTGGAGCGGGAACGCGGTTGCCATAGCTAAATAATGAAATAGCTGTACTCCTAGTAGAACTCCAGTATTGTAACTGGACCCGGTGTTTCAGGTGAAGAGACGAGTCAGCAGAGTTAAAGCTTATTTGTATGAGTTAAGCCCGCGAGGGACCAGATGGAGTCCGAGCCTTAGCGTGAACCCACTAACAAATGACCGATCGCTACATAAAGGTGGGGGTGGTTGTCGGACAATTGGAGTTGGTAACACGTCCTACATTCAAGCGGCTCCTAAAACATGAATGGCATATGATATCAGTTGGTGCTGCACATCTGCTCAACTGGAGCCCCATCACTCAATAAGGATCCTTGATGGCGGAAGAGCGTTGTGGTCTACTGGACCCGAGACCTTCTTGCTGGTCTCGACAGTGAAAGCTCACCTTAACTGCCTACTTTAACGAGATAGGACCTTCCACGCCCCGCGTGCGGAAAGCAATCACTCTCGCTAACCCGATCAATAATTTGAGGAATACTAAAACGGCAGGGTGCCGTCGAGCTATCTAAAATGACTAGCCACCGTCTCGGAACTTAATGGCTCTGGAAAGTATTCATGGGATGTATTCGTAGTGATCTTGACGTCGTTACGGCGAGTATTTACCCATTGAGGTCCATTGCAGACACGGACACCGCAGCAGGCCAATGCCGCACGGGTTCTAGAATTGGGTACGCGCATGCAAGCATAATAACGTAACTAGGAGAGGACCATCTAATGCGCTTGATGCTTAAGTATGGGTCGAGATAGAAGCCCGCGTAGGTAGTAGCGCCGGTCGCTTGCTATCATACCCATCACAACGCTTGTATACATTTCCAAACGCCTAAGGGGTCGGAAGGGGGCACCCCCGGGACATACTCCCCACACCCGCTTCTATGAATAAGCTGGAAACGCGCGATTATAAACGAGGGGCGTCCACTATAAGCCATCAATGCTCGCAGACCTTGAGAATTAGCGATCAGCTCAGGGTGCCGTCACATAATACAGATGCTGCTGCATCGCGCAGTCCAGTTCGCTGTTACATCACGAGTACATCGAACCGCAGCGAGCTATGGCCCGCATACTCTTAAGAAGAGGCAATGTGCACCGCGCTCTCCAATAGATAACGTAAGCGAGTACAGGGACATTTTTCTGCCTTGAATGCCACCGCCACTTGAGTACGAGCAGAGTATTGCATCATGCAACCACAGATCGACCTGGGGAAATTACGCCTTTGAGCGTTGTATCTCACGAAATGAACTACACATTAAGGCGTTTCGAGCTAGCGTGCCGACTCCAGATTTTGAGCCCGGGATAAATTAGTGACAGATGCCGAATACAAACGGGCGATCTGTCTTCAAGGCTCTTGCCAAACCACGAGGCTAAATGGGACCGAGAAGTCACCCAACCCGTTCGGCAATTGTGGGTGCTTTACCTCTAACACCGCGACTCGGCGAACCAGTCAACGAAACTACTTAATCTCATATTCGCGCGGCGCTCCCGAGATCTTCTCGACACGTAACAGGAAATATGCGTTATCAACTCTTTTCCAGTGTGGTTAAATGCTCATACCTGGAAACAATGATGCTCAGCGTAATAGAGAAATGTGCACGAGCCGTAGCGCGGCTCCTGAACCATACGGAAGTGTCCAGGAATAACCCAGCCTATCTTCATGAGGGGACTCTGATAGGTACATTACCAGGAACTCTTTGACATCGTAGAAGCCTTTGGACAAGCTGAGCGCGATCCATCACACACTTTGACTGGCTGGGGGGCTAGTCCCTGCGAGTATCCAATCCGGACACACTTGCATGTTTCACATGCGTGTGTTGGGGACTGTGTTTAAGCCTCCTAGCTTCAGACCGTGGATCTCGGGCTGCCTGTTGTTCTGGCTCAGGCTGATACCTGACAATGGCTCAAGTCGCGATTCCCCGCGGCGTCCCTTCTGCGATTCTGAGGAAAGACCGTCTAAACCTTCCGGTCATGAAGGACCTGAATAGGGGCGCTGATATCCGAACTAATACCCCATTTTGTCCAACTAAAAGTCGGGGCCGAACGAGTTTCGGTCGGTTTTCTGGAAAACCAAGGGTATGGTATTCTCATCAACGGCTCTCAACCCAGATCAGTGCTTCTGGCTGGTCGACGAACATTCCATGGCTAGGGGACTGATCGTCGAGCGTTACAAGAGCTTGATAAGTAGTATATAGCCCAAGACCTTCGGTACATGCATGCCTGGTGATTGCAGGAGTTCTAGTGTTCGTGGTTAATCTGCGATTACGAAGGACCGTTAGAAACAGCGTCGTCATCGTCTCGGACAGTTGGACTTTCCCACTACTATAGCCTTGCTGCCCCGCCACTTGTACGCGCTTACTGATAGTCTTGTGTGACCGTCTGGCTTATTTCGTTCAGTAACTCGAGAAGGACAGTGACTTCTAACACTAACGATCACTTATCCCCCCGACGGTCGGTACTAAAAATATTCACGAGCGTTTGTTGGTGATCTCACAGTACCAAAGCTGGATTGATATAGGCTATGGCTAAGCGGTTGGATCCGTCGGTGACTTGCAACGGGCTCCTCTACGGCAACGGTATCTGATACCTAATAGTGTAGTGCTACACCATAACAGAGCGGTATGCTAAATGTGATTCACTACCGGAGACGTTCCGGCCTTCCAACCACACGTCTGGCGCACGTACGAAATACCTAGCTCTTTCCCCTTTATCATATAGAGCCTCTCTTAGCGGACTGTGACCCGTGTCCAGACAAACATGGAATTCTCGTATTCGCTATCGTCTAGAACTGCGTGACTAAACGTCGCTATAGTTGTACCGATCTGGCTGGACTCAGCAAGAGAATACTCGCGCAAAAGGTGACAGGCTCGCACGAGTAACACTCACGCTTCTATGCGACGAAGTTGTTCGAACCCTCGTGCGGGACTAAGCGCAATATGCGTGCGTGAACGAACTAGGATGCCGTTGGGTTAAGCCCGGTACACTCATGGGAGGTCCTTGAACAACTTATCGGACCAGGGTGCTGAATCAATGCTATCCTCGTTATGATCTTTCCCTCACGCGTGTATCCTCATAAACCGTTCACGTTCACTCGACAATCGCTGGCTTGCGACGTGTTTGTGCGCTATGTTTGGTTACTCGTAAGTCGAATTGATCCGTCAGTCAGTACCAAATCAAAGTCATTCTCAGGTCTTACCCGTTGCAGCGTGTAAATCTACCCGGGCGAAACATGCGGGTCCTATACTTGCGTTATAGTATATTTTTAGCAGTTTCCACCCTCATCGGTAATCTAGGGGGGATCGCATTCCCGCAGAACTCTATCGGATGGTAGGCCCAAGCAATGAGTGAGTCACGCCGCGCATCGCTACATTAAAGAACAGGAATGCGATTAGATCTAGGCCTAAGTATTCTGGCACTTACGAAAGCGGTACAGAGAAAGGTCGCATGCTTGCTGGATCGGTGTGATTAATCGTTACACCTGTCCACATCCTGCTAATCCGACACCCGGTAAGGCTTTCCATATATGAGTAATTGTGTGAAACGTGCGCAAAAGTATGCATCGAAGTTTGGTTGCAAACGTCACTGGAAGGGGCCTGCGCCCAACTCGTCGCAGGGAATAGGGGTGATCGATCGAGCGGCACTGGCCATGCCTTGAGCCCAAGCGGGAAGTTGGATCACAAAGTGAGAGGCTTCCCGATGTAAAGATATGTCCCTGCACAAGAGGGGGGACGGACCGGATAAGCTCGGGTTCCGGTGTGGCGAGACATTGATTTGTGAGTGCATATAAATGGCTACGAATTCGCAACTCGGTGCCGCCGCTCTCGAAACTTAGGACAGAAGATATCGCTTTGAAATGATTAGTTCGCAGCAGAGTGTTGGGTGATGTGTCCCTTGTATGGGTTTTGCGGTCGGGCAACCCGGCGACGTACATTGCATTTGTTATTTGGTATTCCCGATCAGTTTAACGTTGTCAAATGTTCCAGAACCGTTATGGGCGTGTATATGGGATAGTACTGTTGGTGTCTCGCTAAACTTCGCACTTATTCGATTCTGGAAAGCCCGCTCAGTCGGACGGAATTGACAGAGACCCAGACAACCCTTGCGGCCAACAGCCTGGCCTGCCTCCTATTAGTGACAAGTAAATGTTGCCAGAACTTGCGGCGTTTTACCGCAAGCGGTTCCTATAAGTACCGCACGTACGTTGGTCCGTCTCGGCTCCAAAGAGACGCTACACAAGGTACCATCACAAATTGTGAAGTCACTAAAATTTCCTGGCCATTGCAAAGCGCGGAACTACCGTTCCCCACAGCAGACGCAATCGGGTCTTTAAGCATTGTTCGTACGCCGTCATATCTCCTCGCGGGCAACTGCGCAAATACGAATCTTTCGCCATGCTTTTATTGGGAGAGTTAATAAATGGGCGTCTGTTTTGCCGTTGAATGGTTTATATTGGCGGGCCCGTCCGGTTCTATGGTCCGTGCTATGAATTACCCGTGATAGATAGTCGGCTGGGGGCAGGAACGACACACCCCCACTAATCCAAAGTTAGTGCATTCTGGCAGGTCAATGAGCTCGATTCTTTATTGTACGCGGTAGGTGGTGCCTCAACCAAATCGCTTGTTACATGCGTGTACCCCTGCCCCTACAAGGATAGTGATTGTTGTAATAATTACCGGTGGGAAGCAGTGGTACACTTCCCAATCTTTACTGAAAGCCTGGTTTACTCTCATTCGTGACTGATCGCTTGGCCCCGATCTATACCGAGGTATTATAAGGTATAGGGGCCGCACATTTATAGCCCCGTGGTAAGAAAATTGTCACTCCTACCCGACACGTTCCTACCACCCTTCACTGTCTTTGCCCGATTTTACAAGGGGTAGCACATACACCCCGCGTCCGTTCTGATCAGACCATTGCCATCCCCCACCGCTCATGTCTTACGTTAACCACTCTGTGCCTGTTATCTTTAATTTATCTGGCGTCAGGTGTAGTCCCAGAGACTAATGATGTTACCCATACTTATGATTCCCAATCATGTGTAATGAATATCGCTGAAATTAGAATCCCGATTCGCAACTAATCCCAGCGAGGCTCACGCGTGGATGAAACTAGCGGTACCGGGCGGCCGGTATTCGGACGACTTAGCAAGGTTATTTGAGAGTTCGGCATGCTCAACCTCACGGTACTGGTTAATGGATGTGTGGGTGGCTTCTTACTATTTAGCGACTGGGAGTATTCATAACTTATCCTCCCGCCAGGCAGTCGGAGGCCGCCCCTGTGAGACCTCCGCCCACCTTTCTGATTTGGAAAGTTACAAACACGTGCTGATAAGCCGATGTTCCCCGATGGCCTATCTTGTTACGGAAATGCCCTCTATCGGATTGCAAATAGGCTTGCGTCAGAGATGTGGAACGTTAATACGAAAAAGCATTCAGTCGGTGAGCGTGTGCGCCTCTGTTACACATGTTGTCTTCCGGTGATCGGAACTGACACGTTCATTAACGACGAGAGACACGAAGATATACGTATACTGTACAGATCGGATGGAATAGTGTTAGGATCAATTACAAAGGGGTCAATGAAGGTGAGGCACGCAACGTTTGATCTTATGTCGGAAATCGTGTTGAGTAGTCATGCTCTTTCTCGGTCATGCCCTAGCACAGACGCTGAGGTTCCAAGGCAGACGTCAGCTCTCACGATCGCGCTTCGCCCTGTGCGTGCTAGTCACCGCTTGCTAAGGCCCCTAGGGCCCTGTGATTCGCTGCGATCCCTAATCCCCACCCGTAACTCAAGGCTTGACGGCGGAACGGTTCCCCTAATGTACGATGGTAATGCGGGGACCCAATAACCATACGCCTCCGATCGTGGACCAACACAATCATTGCCTGAAGCGAGGATAGGATTAGATACCTGGAACGGAGCAATTTTGAGAGGCGTATGCGGTGGATTGTTTGGGCTTTTGTCTATCACTGACGGAATCATATTGCATCACAATACCTGACGCCTTTCGGGGTTCACTGGTGGGCAAGATCTACCACAAGTCCAGCATTACTCAGCGAGTGCTCCCTAGGCGTCATTCACGAAGCTGTAGGGGTCCCTACCCTCCCTTCTTGAGTTGGGTGGCCGCCTCTGTAGATGGTCTCACCACAGCTATTAATAATGATAAGGCACAATTGTGACCAGCGTTGCGCACTTTGTAAATTAGGGAACAAGATATGGGGTGTGGGCATTATGATTAGAGGGCCTGAGATCGATTCAGTCGCTGCATTTAATCGGATGCATCTTGCTTAGGGCTGTTACTTACTGTATATTGTCACGCGCACCATTCCGAGCCATCAATGGACATCATGCCCTGTAAAAAGGTTGGCCATTTTAGAATGGCCCAAGCCATCTTGGAGACCTTACAGCTCCCCAAGCTTCAAGCAAATGGTTCGGACCATAGAATGTGACCCAAACGAGTATTTTGCAATCCTCTCACGCAATGATCAATTTATATAATCTTTGAGCTCATGGGCGCTCTCAGGTGAAGGGATCGCCTGCTACTGTAGCTTAATGATGCCTAGGTTGAGCACACAGCGTCGTTATTGTGCTATGGACACGAAAAATGCAGATCGGCCAAAGGGAGACATAGTCTTACTTTAATATGCCTACGGTGATCTCCCGGCGAAAAGCATTAATCAGATGGCGATACAGAGGTCTCAAACATGGGGAATCCAATGAAGCTGGTTAATGCTAAGGGGCACATCTATCGATCCCTGGTCATACCGGGATCTTACTAAACGGGGACGAACTTATGATCATCGAATCATAATGACTATTACAGAGGCTCGTTAGTTCATGCGGGCTACTGATGTCTGATATTATTAACCGGACCCAATCAGGCAAGTGCGGTCCTCTGACTCCGTATGGCTCCTTACGCGAGGACTCAAATATACAACGCTGCGTACTGCTGTGTGGAGGCTCAACTTTCTCGGTGTTCAACATCTGCGAGTCGTCGGCACCCTCCAATACAGGGGTAGGGGACTGACTTGCTTGGTAAGTCAAGGCCAACTCGGTTACCAACAGAGAGGTGTTAACTACTGCGTTCGAATCGTTGATATCGAGTGTGACTCTAACACGCGTGATCGTATGCAGATTTCCATAGCCGGTTACATATTCATCCCGTACGAGCGAGTGGACCAGATATTGACATTCGCGGGCGCGCCGTAAGGTCCTCTTGCCGGAGCGAATATGTTTCCGTACAGGAACCGTCCTTGAATGTCAGTCTATTTGGGCGAACGTGAGCGATTTTCTGGCGGATTATCGGATCAAGTATAAGCGAAGGGGTCCCCGTAGCTGTGCCATACGAACGGGTGTGCCTCGAAACTGCAGCCCCCTACACCTTATCGTATGATCAATTAACTCTGGGCAGAGGCTGCTTGACACGGAGAAGACACGCTAAATGGACGTTCAGAAACAGGGACGGCTCAACGCGTCATCGGGAGAGAGGTGGGGTGAAGATGTGTCATACTCCATGCATCTGCGAGGCCTATCGGCGCTAAGCGGGAAGTCGTCGACGCGCTAGACAGATTTCACTTTAATGAAGTTCACCGCTCAGCAATTAGCCGGACAGTGCCCGTCCAGATAAACTTAACTTTGGGAGGAGAAGAAATAACACAACGTAACGTACGTCGCTTCGACTCACGGTCGTTATCTGATAAAAAGGGCGGGATGCGCAGTTTGCTAGATGTTGGTACTCTCTAAGCCTGGTACACCATTTGAAAGACTTACACTTACAATTAACCGGGATTAGGTAGGATACTCGCAGTACGCTTACCATACTCTGGATACGGGGAAGGAATTCGGTTTCTCTGTTTTTCTTACACTTGGGACCTGGCGTATCCATCTGCGCAACGTTTTGGCGAAGTATATCCAGGCACTAGCGTGAGAAGATATCGGCGCTTAATCCCGGCTGTCGGATATTTTGCAATTGGTTTGGCATCATCGTCCTAGTATCTAGTCAAGTTGTCGTTAAAGCAAGCACGCGAATAAGGAGGCGAATACCAAATGTTCGGATTAATAGAGCCCGATAAACTTCTTCGTCCGTTGGCGGTCCCAGGTTTGCCCGGGAACCAATAAACGTATTTCTAAGACTCTAGTAAGTCCGCTCTCCCATTCTTTCTGAATAGTACTGATCGGGCCTAGGCGGGTCGTAGCGCCTGCTTAAACTCTCCGAGCTCGTCCACGTTCATTTAAACGAATACCCCGTAACACCATTCTGCACATCTAGCACTCGTTGGAAAGTTAGTCCAATCCTTCTACCTGGCTATCCTTCTCATCTTCACTGCTGACACTGCGTGGACCAAATATTTGTTATCCAACTTCTGTGACCGTGATGGCTTCTGTTAGGAGGATAGCATGTTCACGGAGCAATAGATCTTATAGCTCTTCTAGAGTTCGGCTATTCTCCCCGTGAGTCACCAGACAGCTCCCA +8 TTGACTTTCCATATGTCACAGATCAACTGACCACCTAGGGTCAAAGTTTCTCCGGGGGATGTACATGCATTTGTCACACTCTTGACAAAGCGGATTTGATCGCCTTGCTGCAATCGACACTGTAGGTGACATATTAGCAGGCTGGGTCCCTATGCCAGACCCGTCGTAACTAAAAGACCGAATTCGGGAACTTCTCGCCGGAGTCCCTCATGACCCGTTTGAATATTTGCAGAAGCACCCTCTTTGTTTGACGAAGTACATATTAAGAATGTTGTAAGATGCATCCAAAGGACCTAGGCTGAAGACCACACTATATCTAGTACTAGTTCTCACCTGCCCAACGAACAGTGTCATACCTATCGGTTGCTCTAAGTTACACAAACATCTTGTGATTACGGGAATAGAGCTGAGACCGTGAGCAGGGAGAAGCGGATCGCATGGAACCAGGCGACGCCGCCCAGCGGCGCAAAACAAAGCCGTTGGCGTTACAGCTATCCTTAAGTTTTCTCCCTCCTACGGCTCTAGTGTACTGGTTGGATTAGCTGATTCTACCTTGGAGGATTAGGGTGAGTTACGGAATCTAACTAGAGCGTCCCGACGGTCCAATCTTACATCGTGGAAGTAGTTAACGCTGTTCGTATTGCGATGCAAGTCGCCCGCATAGGACTACGGTGCCGTCTTCCCAAAAACGGCAAAACAGGTGTCAAGTATTCACTGACCTAACCTGGAGGGGAAACACGGGATCCTCGCTCGGCCGGGGGCCCACTGCTTCCCACGCACGGAAAAATCTGCCCCTGGTACTCACTTATCATGACAGCCCCAGAGACTCCCAGTTGGGATCCTTAATGCCAGCGCAATACTGTGCCGAATTCCAGTCTCGCGATTGTATGGCGCAAGTCCCTTGGGATTTTTCCAATCCAGTCGTACCGAGGCGCTCCTTGTATCCACTCCATCGCAGATGCAAGTCGTCTGGACGATACTAAAGAACAAAGATAGGCTCTACGAATTTGTATTGGCACCGCCGACCGGATGGTGAGATCGTTAGAAGATACGCTGGAGCGAAAAGTTCGTCACGTCGCTAGCGAAGTGTACCGGAGCTTGGACACCAGTGGTGAATTTGCTCCGCGCGCAGAAGACCATCTGACACGGTAACTAAAAACTACGACCCCGAAATAGCCAGAATAGCTAGCATGAGTAGAATGGACGAACAGGAGGGATTCGCTGAGCGCGGTGACCGAAAGTTCGTGTACTTGGTAATGCGTTCTTAAATCCGGCATAATGAGACGTAGCACTAGTGGGTACTTAACCTTGTACGGCGCCGACGATTCACCTGACGATCGGATCGGCTTGCCTTCTGTATTGCGGATGTAGTTTGTCATATTAGATACAATGTTCTACATCAGCCTGAAAGCCACAAGCGGAATTCATTAGTCTGGCCTGAAGGCGTTTGAACTTAACTACACAATTCCTGGGCCCGGAGAGGTGCAAGCGTTCTTACAGAACACTACTTTGGAGCGGAATCGGTGTTGCCATAGCAGAATACGGAAATCTATGGACTTCTAATAGAATTCCAGTATGATGACCGGACCCGGTTCTTCAGGTAATGAGACGTGTCGGCAAAGTGTAAGCGTTTATGTATATGTCAAGCCACCGATGGACCAGAAGGAAAGCGAGCTCTAGGCTGAACTCACTATCAGATGACAGATCGTTAATTAACGAATGAGGTGGTGGTCGGATCATTGGTGTTTGAAACACGTCCTACTTACAAGCAGCTCCTAAAACAGTATCCGTATGCTTTAACAGTTCGTGCCGGAGTGCAGCTGACATGGAGCCCCCTTAAACAGTAAGGATACTTGGTGGACTAAGTGGCTTGAGGGTAACTGGACCCGACACCGTCTTCCTGGTTGCGACAGAGAAAGCAGACCTCAAGCGTCGCCAGTAACGAGATAGGAGATTCCAAGCTCCGAGTGCGGGAAGCAATCACTATCGCTAACTAGAGCAACAATTAGACGTATCCGAATACGGGCGGTTGGAGCCGAGCTATCTAGACTTACGAGCCAAAGTCTCTGAAGTTATTAGCTAGGGTACGTACTCTTGGGTGGCATCCGTAATAATCTTGCAGTCATTACGCCCAGAATTTACCCATGTAGATACCTTGCAAACACTGCGACCGCTGCACAACTCTGCATGAGGAGACCTAGATACGGGTACGCGCATCTTTGGCTAATAAGCTAACTACGAGATTACTTACTAATGCGACCGATGAGTAACGAAGGGTTGAGATTGACGCCCGAGGGTTTAGTAACGCCGGTCGCCAGCTGTCATACTCGTGGCCTCGTTTGCATACATGTCTAAACGCCTAGGGGGTCGCCATATGACACCAGCAGGGCACAGTGTCCTGGCCCGCTTTCGTGAATAAGATGGAATCTCTCGATTCTGACCGAAGGTCGTCAACTATAAGCCATCAATCATTGGGGAGATTTAAAATTACCGATCGGCACAGAGTGGGGTTACTTAGTACCGATGCTGCTGCCTCGGGCAGAGTGCCTCGTTGTTACATCACTCGGACATCGCACCGCAGCCATGTGTGGGCCGCTCTCTCATAAAAAGAGGCCTAGTGCACCCCGCTGCCCAATGGATAACGTACTCGAGTTCCGGGGTATTCGTCTGCGTTGACACCCGCCGCCCCTTGTGTTCGCCCATAGCCTGTCATCATCCTCTCACAGAGGGACCTGAGAACATTTCGCCTTTGAGCATAGCAACTGACGAAATGAACTAATCATTTAGCCGTATCGACCTCGTGTGGCTACTCCAGATTTTGAGCCGTCGATAAATCAGCGAGGTATGCCGGAACCGATCGGGATGTCTATAGTCAATGCTCATGTCTATCTACGAAGATTAAAGGGAATGAGGGGTCAGCCAACACGTTCGTCAATTTAGGGGGCTTTACCTCCAATACCGCGACTCGTAGAACCAGGGACCGATTCTTCGTAATCTCACTCTCGCGCCGCCCTCCGGAGATGCTGTCGGTACATACGGGGTAATATCAGTTATCGACCCTTTTCCTGTGTAGTTGTATGCTCATACATAGAAACGCTTGAGCACATTGTAATAGTCAGATGTGCGCGAGCCCGCGCGCGCCTCAGCCACCATACATAAGTGTCCAGGCACAACCTAGCCTATCTTTATGAGGGGCGTCCTATAGGTACATTCTCAGGCACACTTTTGCATCAAAGCAGCGTTTCGACATAATGTGGGAGATCCATCACACACTTTGGGCGCTTGGGGGGTAACTCCCTCACCGTAGACAAACCGCACACCCTTGCGTGTGGCACGTACGCGTGTTGGGGCTTGTGATTGAGCTTCGTAGCTTCAGACCGAGAACCTCTGGCTTGGTGTTGTTCGTTCTTAGCGTGATAACTGAAAGAGGGTCAGGTCCCGACTATCAGCCGCGGGCCGGCGCCGATTTCGTGGGAAGTACGGCTAAAGCTTCGAGTGATAACTGAGCCGAAAAGGGCCGCTGGTAGCCCGATTTACACCCGATGTGCTCCGTCTGAAAGTCAGGGCCGCATGAGTTTCGGTCGGTTATCAGGAAAACCAATCCTATGGTATTCGCATCGCCAGATCTTAACGCCACAATGTACTTGAGGCGGGTAGACGAGCATTACATGGCTAGGGGACTCATCATCAAGAGCAACAAGAGCCTGACAAGTAGGCTCTAGCGTTAAACCGTCGATACATGCATGCCTAGTAGTTGAAGTAATACTAGCGTGCTTGGTTAGTCTTCGAGTTCGAAGAACCGTTAGAATCATTGTCATTATTGCGTCGGACAGTTGGAATTTGCGACTTCTATAGCGTTGCTCGCCCGCCACTTGGCCGCGCCGAACGCTAGTAATGTGGGACCGTCTGGCTTATTTCTTATGGTAATTCGAGAAGGACAGTGACCACGAACTCTCTAGATCGCTTATCCCCGCGACTAACAGTACTAAATATATGAACGAGCGTTGATTTACGGTCTCACAGTAGCAAGGGCAGATGGACGTATGATTTGGCTCGTCGGTGGGATCCGTCGTATACTTGCCACGGGCGGCTCTACCGCTACCTTATCTGCTCCCCAATTGTGTAGTAATGCACGATGAAGTGGCGTTATGCTAAACGCGATTCACTACCGGAGCGGTTACGGCCTTCTAACCACACTTCAGGCGCACCTACGAAATACCAAGCTCCTTCCCCTTTATAATACAGGGCCTATTGCAGCGTACTGTGGCCCGTGCCAAAACAAACATCCTTTTCTCCTATTTGCTATCGTCTCGAACAGCCTGAATAAACTTCGTTGTAGTTGTACCGCTGTTGGTTTAGTCGGCTAGAGAATACTCGATCTATCGGTTTCACACGCGCACGAGTCACACTCACGCTTCAATTCGTGGAAGTTGTGACAACCCCCATACAGTACCAAGCTCAATATGTGTGCGTAGACGGGCTTGGATGGCTTTCGATTAAGCAAGGTATCCTGGTGGACAGTGCAAGCTCTACATAGCGGATTAAGGTAATGCATCAATGCACACTTCGTTGTGACCTTTGCCTCAGCGGAGTATCCTGTTATACCTCGCACTTTCACTCGAAAGTTGCTGGGTGGCGACATGTCACATAGCTATATTCGGATACTTGTACGACGAATTCATCCGCCCGTCCGTACCAGATCGACGGAATTCTCAGTATTTACAGATAGCCATGTGTTATTCTAGCCGGCCGAAACATGCAGGTTCCATCCTTGCTTGACAGTATATTTTGAACAGATTCCACGATCATGGGTAATCTAGGGGGTATCGCATTGCCGCCGAACCCTATCGGGTGGGAGGCCGTAGCAATGTGTGAGTCTCGCCTCGCATTGCTTCATGAAAAAACCGGAAAAAGATCAGATATCGGCCTAAGTATTCGACCACCTACAAAAGCTGGGCACAGAACGGTCGGATGCTTGCTGGGTCGGTGTGATTAGCCAACAGATGGATGCTTCTCATGCTAATCCGAGACCCGATGAGGCTGTCGATATAGGACTATTGACGTGGGGGCATGCCAACAGGATGCATCTAGGTTTGGTTGCAAAGGACACGAGCAAGGGCCTGAGGCGGCCGCGTCGCATGCAAGAGGGGTGATGGATCGAACGGACCCGTCGATGCCTCGAGCCCATCAGGGAGGTTGGATCACCAAGTGATAGGCTTGGCGCTATAAAGATATAATACCGCGTAACAGGAGGGACGGACCCCAAAAACTCGACTCCCGGTGTCGCGTCTCATTATTTTCGGAGTGGATTTAACTGGCGACGAATTAGCAGCCAGGTGGCGCGCCTATAGAGCATTAAGACAGGAGATATCGGCTTATAACGGTTAGTTCCGGTCTGCGGGTCGTCTGGTGTGAGCCATATATAAGTTTATCGGTCGATCAACCCGGCGGCGTACGTAGCATTTGTTGCTTGGTATCCCCTTAATCTTCGACGTAGCGAAATGTCCCATAACCGCTGTGGGGCCTTATTTCGTAGAGTACTATTGGTACCTCTTTACACTTCGGACTAATTGTTTTCTCAAACGGCACCCAAGTCGGACGGATTTAACGTAGACGCAGACACCCACAGCTGCCGTCCTTCTGGCCTGCCTTCTATTAGCGGTAAGCAACCGTTCGCATCACTTGTGGCGTTTTCCGGGATGCGTTACTTATAGGTACCCCACGTACAATGGGCCGTCTTGGGTCCGTAGAGACGCAACACAAGGTACCTTCAGAATTTGTGAAGGCACTAGAATTGCATGGCCTTTGCAGAGCGGACAACCTCCGTTCTCCACAGCAAACGCGGTCGGGTTTTTAAGTGTTCTTCGGATATCGTCATATCTCCCCTCTGGCAACTGGGAGTATACGGCTCTTTCGCAATGCCCTTTTTCTGACATTTAATTAATGAGTATCTGTTTTAGCCTTCAATGGTTTATATTGGCCGCCCCGTCCGGCTCTATGGTCTGCCATATCAATTACCCGTTATAGAAAGTTGGCTAGGCACCGGAACGACACAACCCCGCTTCTACCAAATTAGTGGCGCCTGGGAGGTCACTGAGTCTCGGTCTTTATTGTGGGCCGTAAATGGTGCCTCAACCAAATCCCTTGATACATGTGGGTACCTGGGTCCATTGAGGGATAGTGAATGTTGTATTATTTCACGTTGGGAAGTAGTGGTACGCTTATAAATCTTCACTGGTGGCACGGTATAATATAATACGGGACGGCTCGGTAGTGCCTGCTCTATACGGAGCTTTTAAGAGGAATCGGGGCCAAACTTTTATATCTGCGTGGTAAGTTAATTATCGCTCCTACCCGAGACGATCCTAGCCACCAGCGGTGTCGTTGCCCGATTCTACTAGGGGTAGCATATAAATCCAGCGTCTGTTCGGATTTGACGCATGCAATTCCCCGCCGCACACGTCCTTCCTTAACCTTTCTGTGCCTATTATCCTTAAATCAACTGTCGCTTAGTGTATTCGCAGAAACAAAACAGATTTCCTTACCTTATGATTCTCAATCATAAGTATTGAATATCTCTGGTGTTAGAATGACGATTCGCAGTGAATTAGAGCGGTAGTCCCCCGTGCAGGATACTTTCGTTACCGGGAGTACGATAACCAGACGACTTAGTAGGGTTATTTGAAAATACGGCCGGACCAAGCAATCGGTAAGAATTAAAGGATGTGTGAGAGACTTCTTCCTATTTAGGGAGTGGGAGTCTATATAATTTATCCTCCCGCCTTGCAAACGGCACCCGCGCCTGTCCAACTTCCGCCTTGCTCTGCGACTTGTAAGGATACTAACACGCGCCGTTCAACCTATCCTCCCGTATAGCCGATCGCGTTATGGAAATGACATCCCAGGGATTGCAAATAGGTTTGTCACAGCCCTCTGGTGCGATATTAAATTTGAGCAGTCAGTCAGGGAGCGGGTGCGCCCGTATTACACATGTTATCTTACGGTGATCGAAACTGACACATTCATCAACGATGAAAGATTCGAAGATGTCCGTATGCGCTACTGTTCGGATGAAATAGTGTTAGGATTACATACAAGGTGGTCATGGACGCTGATGCACGAAACTTTCACTCCGATGGCAGGGATGTTGTTGAGTCGTCACGCACGTTCTCAGTGAAGGCTTCGAAAACATCCTGAGGCTCCAATGCTTACGTGAGCTCTCTCGCTCACGCCTCGTCAAGTGCGTGCGAATAACAGCCTGTCTAAGCTCGTAGGGACCTGCGATGCCGGTCGATCCGTAACCGCCACCCGCAATACACAGCTTGACGGCGGAACGGCTACCCCTGGAGACCATGGTAATGCCGGGACACCGTAACCATATGCCTGCGACCCCAGACCGTCGCAGCTATTGACACAAGTTAGTAGAGGATTTGATCCATGGAACGGGCCATTTTTTAAAGGGGAACGTGGTCGATTGTGAGACCTTATCTCTAGCTCTGGCGTAATCCTACTCCCTAAAATTATGTGACTCTTGTCGGCGTTCGCAGGTGGGCAAGATCGACCTGAAGTCCTCGATTGCTCACCGACGGCACCATACGCGTCAGTCACGCTCCTGTAGCGGCGCGTATCCTCTGTTCTTGAATTAGTTAGACACCTTTGAAACTGGCCTATCCACCCCTATTAATAATGGTAACTCACACTCGTGACAAGCGTCGCGCAGTCAGACCAATAGGGAGCACGAGATGTCGGGTTAGGATTTAGACTAGTATCGTTGAGTTCTAAGCAGACGCAGCATTTAATCGGAGGCACCATGCTTCGCCCGGTTAATGTCTAAATGATGTCACGGGCGTCATACCGAGCCATCACTGGACTTTATGCCTAGTAAAAAGATTGGCCACTTTTAATGACCCCATGCGATTTTGGACGCTTTTCAGCTCCCCAGGCATTTGGCCAGTGGATCTGACAATAGAATGTTACCCAAAAGGGTATTTTGTAATACGCTCACGCAATAAGCAATTCACATCACAGTTGAGCTCATGCTTGCTCTGACTTGAAAGGATCGCCGGATATTGTATCCTAACGATACCTCGGCTGATCACGCTCTTTCTTCTTTCTGCTAGGGACACCAAAATTGCAGATCGGCCTAAGGCAGACAAGGACTTACTGGAATATGCCTACTGTGTTCGCGCGGGGAAAGGGGTCACTCAGATGGCGATAGTGCGGTTTCAAAAATGTAAAATCCAATGCAGTTTGTTAATGCAAAGTATCTCTCCTATTTCTCCCCTGTCATACCCCGATCATACCAACCGGAAACTAAGGTTTGACGCCATAATCAAGATGAGTATGGCTGCGCCCCGAAAGTTCATGGTCGCTAATAATGTCGGATATTATTAGCCGGACCAAATCTTAGAAGTGCGCTCCATTGACTCCGGATTACTCCTTACGAGGGGCCTCCAAAATAAAACGCTGTGCACTGTTGTTTGGAGGCTGACCTTACTGGGTGACCTCCGACTGCAACTCGGCCTCGACCTCCGATAGACGGGGCGAGTAATGTCCTGTTTCGTGACTCAATCAAAAATAGGTTAATGTAAACGTGGTGGCAACTACTGAGTCCGAATCGATAACCTCGCGTGAGAATCAGCCAAGTGTTCTCGTATGCAGTTTTGTAGAGCCTACCACATAATCCTCAAGTATCGGTAAGAGGAACAGATTTTGGCATACGCAGGCGCGCGTTATGGGACTCGTGGTCTAGGGAATGTGCTCCCGTATAGGAACTCTCCTTGAATGCCGGTGCAATTGGGTGAACGAGACCGATTTGGTGGCAGATTTTAGTTTAAAGTTTAAGCAAAGGGTTCCCCGAGGCCGTGTCATAAGAACAGGTGTAGAACGAGTCCCCGGGCCCCCACCCCTTATAGTCAGATCCACTAACTGTGGTCAAAACCTGGTTACCACCCAGCACACACGAAAAGGGGACGTTTAGAAATTGGTTCGGCTCAGCCCGTCAGCGGTAGAGACGCGGGGTGAAGATGTGTCATACTCCATCCATCTGCGAGCCGATTCGGTGTGAAGGGGTGCGTAGACGACGCGCGAACCTAATTGGAGAAGAATAACGTTCACCGCGCACCGATTACGCTGAGACTGTCCGTCGCGATCTACTTAAGTTGCGGAGGAGAATAAATAACATAGAGTAACGTACTTCGCTCGGCTTCAGGGTCGGTAACTGATAAACAGGGCGGGATGCGGTGAGCTCTAAAAGTTGGTCGGTACGAAGCATCGTACACCTTTTGAAAGATTTACACTATCCATAAACTGGGTTTTGGTAGGACACTCGTTGTACGGCTACCCCACTTTGGCTAGAGGGAACGAATTCGGCTCCTCGGTGCTTCTTACGCTACGGATCAGAGGTCTCCATCAGCACGACGTTTTGACGCACTAGATGCGTGCGCTAAGGCGAGAAGACAGCGACACTTGACCACGGCTACCCGATCTTTTGTAATTGGGATGGCATCATCGTCCTAGTAAAGAGTCGAGTTGACGTGAAACCAAGGCCGCTAATAAGTAGGCGAATACCAAATGATCGGATGAATTCAGTCGGAAAGACTTCTTCGTGTGTTTGCAGTCCGAGGTATGCCCGGGAATAAGAGAACGTATGCCTTAGTCTCCAGTAAGGCTGCTCGCCCAATATTTCTGAACAGGACCGATCGAGCCTGGGCGGGTCGTAACGCCCTTTTTAACTCTGCTAGCGGCTTCTCGTACAAATAAACGGCTCCCGCGTCAGACTATTCCGGACATGATGCACTCCTAGGACAGTAAGTACCTCCCTGCTAAGTGACTATCATTCTCATGTTCACTGCTAACTCTACGTGGGCCAAAGTACTGTTATCCGACTACTGTGACCCTGACGGCCTCTGTTAGTATGATTGCACGCTCAAAGAGCACTCTAACTTAGCGCTCTGCGGGACTCCGGTTACAGTCTCCATAAGCCTACAGGCAGTCAGAA +9 TTGACGTTACACATCTCCAAGACTAACGGATCTCCTCGTCTGAAACGAACTCTGGGTGAGGTAAATCATTCAGAAAAAGGCCTCATACAGCAGGTTTGAAGTGCTCGCTCCAGCAGACACTGTGTTTGAGATAGGAGCAGTCGGGTTGCCTAATCAAGACTCCTCGTAACTAAACCCACAAATTGGGGAGAATCTTTCGGTAGTTCCAGCCGCCATTACTGAATATGTGCAGAAGAAACTTCTGAATTTTCAGAAGTACTCGTTAAAAGCGATTGTTTGTGCATCCTAACGACTTTGCCTGATGACTTCGTCAGATCCATGCCCAGTTTTCACCTGACGACCGGACTGTGTGAAACAATGCGGTCGGTCTACGGTACAAAACTATCTTTAGATTTTGAGTAAAGGGCTGAGACGGAGAGCGGGGAGTGGCGGACCGCATTTTATCAGACAACCCCGACGAGCCGCGCTAACCTAAGCAGATGGTGTTAGAGATGTCAGTAAGTTGTCATGATTCGCCTGCACAAGAGTACTGGTTGAATTAGCGAATTGTACATTGGACGATAGGTATGATTGGCAGAATCTGACGCGAACGGTCTGAGCGGGCAAGTCTCCAACTTGGAAGTGGTTAGCACGTCACTTGGCTCGATGCAAGGCGGGATCCTAGGCGTACGGTGCGATCTGCCCAAAACCGGCAAGACTCGTGTCCTGTAATTAGCGACCTATCCTCGTGGGGAAAGATTGCATGCACGTAAGACTGGCAGCCCCCTCCGGTACCCGTACCGACGGATATTCGCGGGGCGCAACGTTAATAGTACACCCTTAAATACTCGCTGTCGGGTTCACCAATGCTAGTGGAGTACAGTTTTAGCTTCTGGTCTTGCGAAGGTAAAGCGAATGTATCGTGGGGTTGATCCAATCCCCTCGTACCAATACGCTTCTTGTTTCCACTCCTACGCAGAGGCAACTCTTCCGGACGACACTAAAGTATCAAAATAGGCTCTAAGACTTTCTACTGGCACTGCCGACCGTTTGGCGAAGTCGTCAGGGTCTACACCGGGTCGAAAAGTTCGGTACGTGGATAGCGAAGGTACCCCATCCTTGGACACTAGTTGAGGTTTTGCTTCCCGTCTAGCTGACTCCCTGACACGGGAACTCAAAACGACCTCCTCGAAATTGCTAGTATAGATAGTTTGCTCAGAAAAGACGAACAGGCGCACCACTCTGAGCGCTGAGACTGGAAGTTCGAGCCTTTGGTCGTACGTGCTTAAATCGCGCAAAATGAGCCGTAGAACTAGTGAGTATTTCACCCGGCACACGACAGGCGACTCCCCTTAGCGGTGGTTCGGCCTGCCTTCCGTTGTCCGAAGATTCACTCACCTGTGTGATACATTGATCTCAGTCAGGTTGGAAGTAAGAAGGGGAACTTATCATGCTGGCCTGAAGACATTTGAACTCCACGTAACAAGTACTGGGTCCGCAGAGGTGAATGAGTGCTTAGAGGGCTACTCATTCGGGCGGGATCTATGTTACTAGCGCCGGATACTGATACGTCTGAGCGTCTAAAAGAATGCCAGTATGTTGACCGGGCCCGCTGTTTCAGGTGATGACACGTTGCAGATAAGTGTATGCGTCCAAGTATGAGTCAAGCTAGCGACGGGACAGAAAGAAAGACAGCCTTTGCGTGAACTAAGGTTAAATAGACAGATCCCTAAGTAGGGTTTTAAGTGGTGGGCGGGTAATAGGAGTTGGGAACATGTCCTTCATACAAGAAGCTCCTTAACCAGGGAGGGTCTCCGACCACAGTAGGTGCAGCACATCTGCTGAAATGTAGCACCCTTATGCAGTAAGGTTACTTTGTGGATTTTCTTCCGTCTGGGTTACTGGACCCGACACCGTCTTCCCGTTCTCGACAGCTAAAGCAGACATGAACTGTTCCTTGTAAGGAGATAGTAAGTTCCAAGCCCCCCGTGCGGGAAGGAATAACTGACGCTAACCCGATCAATAATTTGACGAATACAACAAGGTGAGGGTGCCAAAGCAATAACTGCACTTACTATCCACAGTCTCGCACGTTATTGGCTGCTGTCCGTATTCATGGGTTGTGATCGTAGTAACCTTTCCTGCTTTACGTCGAAAATTTACCCATGGAGGGAAATTGCAGACGCAGACACTCCAACATACCCCTCACTGTCGGCTCCTAGAAATGGGTACGCGCGTTCAAGGATAATCAGGTAACTGCTAGATTACCAGTCACTGCGCCTGATGCTTAACCAAGGTTTGAGATTGCCCCCCGGGTCTTTAAAAACGCCGGTCGCCATCTATCACCCTCGCCGCCACGTGAGCATACATTTCTAAACGGCTATGTGGTCAGAAGGTGGCATATGCTGGACACACTGCGCAGGCGAGCTTTCTTCAATACGATGGAAACGCCATATGAAAAACGAAGGCCGTACAGTATAGACCATCAGTCCTTGCTCACATTTAAAATTATAGATCCCCAGAGAAAGCGATAACTAAGCACACATCCTTCCGCTACGCGCAGATCGGATCGCAGTTAGATCGCTCTGACCCCGCAGCGCCGAGAGCCTCGGCCCGTTTGCTCTTAAGAGCCGGCAAAGTGCACACCGTTGCGCCATGGAAAATGGAATGGAATTCAGGGAAAGTTGTCTTCGTTTACAGCCACAGCCACTTGTGCGCGAGAAGGGGAATTGTTCCTTCTATGACACAGGCAACTGAGTACATTGCGCCTCTGAGCATTGTAACTCACGTAGTGCCCTCATCAATAAGCCGAATCTACCTAGTGTTAGTCCTCCGGAATTTGAGCCATGGGTAATTTAGCGCGATATGCCGAAGCCAAGCAGGGTGTGTATAGTCAAGGCTCAGGACTAACCACGAAGACCATTGGGAGTGATGGGTGATCCACGACGTTCTCAACGTTGGGGTGCATAACCTCCTACCCCGCGACGCGGGGTATCAGTGAACGAAAATTCGTAAGCTGACTTACCCTCGCCCGTCCTGTGATCTTCTCGATCCGTATAAGGAAGTCTGAGTTATGGACCCTTTGCTATTGTTGTTGTATGTTCATACATTCCAATTCTAGTGGTCAGCGTACTAGAGTGATATGCGCGTTCCCTAGGGCGGCTCAGATACCATACGGAAGTGTGCAAGAACGACTCAGGCTCTCTTCATGAGAGGCGTCCGACAGGTGAGACCCCAGGAACACTTTTACATGAAAGAAGCTTTAGGACCTACTGTGCGGGATCCATTTCACAGTTTGTGAGGCCGGTGGGCAAGTCCCTGTTAGTATAGAATCCCATCGGTCTTGCATGTGTCACGTACGGCTGGTGGCGACTGTGAGCGCGCCTCGTAGCTTACTACAGAGCACTTCGGGCTTGCTGTTGTTCTATTCCAACCTGACAGCTGACAAATTCTCGGGTGGCGAGTCCTCGCCGAGGGCCGGCAGCGCTTACGTGGCACGGGCGGGTAAAGCTTCCGGTGATGAGTGACCGGAACAGCGCCGCTGATTTCCGAACTTTTACCCGATCTTGGCCGTCTTAAATTCAGGTCCGGATGAGATTAAGTCGTGTTTCAGAAAAACCTACCCAATGGATATCACATCAACAGAACTTACCCCGACTCTGTAATTGATGCACGTCGACGAAATTTACGTGAGTAGGGAATACATCATCCAAAGTTGCAAGAGCGTGATGAGCCGGGTATAGCCTTCAGCCGGCGGTACATGCGTGTTTTGCGGTTGAAGGACTAGAAGTGTGCGTGGTTTCTCTGGGATTACGAAGGACGCTTCGAATCCTAGTCATTATTGCAGATGACAGTAGGACTCCACCACGTGTATAGCTTTGTTCGACACCAACTGGTACGCGACGTAAGTTAGGCACGTTGAACCGTCTGGGTACGTGCGTTTGATAACTAGGGATGGTCAAGGACGGCTAAGCCTAACGAGCCCTTATCCCCCCGACGATCGATTCCAACTATATGCACGAGCGTATCTTTGCGATCTAGCAGTAACAAGGCTATAAGGAAGTATGATATGGCTGGGCGATTGGATCCGTCTGAGACTTGGAACGGGCTCGCTGACGGCGACCCTATCAGATCCCTAATTGTCCAATGGTTCACGATATCAGGGTGATACGCTATACGCGATTCACTACGCGTAACGTTCCGTCCTCCGAACCGCACGTCAGGCGCACGTAACAAATTCCTAGGTCGTTAGCCTTTATAATGTCGGGCATATTGTCGCGGCCTGTTATGTGCGCCGATACAAAGATGGATTGCTCGGGTGTGCTTTCGTCGCGAACTGGATGCATAGACGTCGCTATAGTTCAGCGGCTTTTGGTCGAGTCGGCGAACGAGTAATCGCTCCAACGGACTTACACGCGTACGCGTCACACTCGCGCGTCAATGTCAGGGAGTCGATATCACCACCGTGCGCCACAATCAGCAATAAGCGTGCGTTCACGAACTAGGTTCGATATGTATTAAGACCTGTACCTTAGTCGATGATCTAAGTACAAATGCGCGGACCAGGGTAACGCATAAATGTACTCATCGTTATGACGTTTGCATGAGGGGTGTATCCTATGACTCCGGGTTATTAAACCCGATAGTCGCGGGGAGGCGGCTCGTTGTCAAGCGATTTTCCGATACTTGTACGTCTAATTGCTTCCTCCGTACGTACCAACACAAAGAAATTCTTGCGCCCTACCCTTGGCCGCCTGTTAATCTTCCCGGACAAAACATGCGCGTCCCATTCGTGCATGACATTATATGTGGAGCAGTTTCCACGAACCTCGATGATCTAGGGGCTATCGCCGTACCTGTGGAAGCCATCGGATGTGAGGCCGTGGCAATGTGTCAGTCACGCCCCTCCTTGCTACATTCTAAGACAGGAACGCGATCAGATATGGGTCTAAGTATTAAAGGAGCTAAGAACACAGTGCACAGAACGGTCTCATGCTGGCTGGATGGTTATGACTACCCCACAGACGTGGGCAAATCCTACAAAGCCGAAACCCAATAAGGTTGTCGATTTAAGAGTATCTTCGTCAAATCTTGGCAATAGGATGCATCTAGGTTTGGTGGCAAACGACTTTGGGCGGGTCCTACCGCTCACGCGTCGCTTGGGATCCGGGTGATCGATCATACGGCCCTGTCCTTGCTCTAAGCGCATCTGGGAAAATGAATCATAAAGGGACAGATATCGAGATATAAAGATATGACACAGCACAAGAGGGGAGAAGGACCCGATAATCACGTCGCCCGGTATTGCGGATCATTACTTTTGCAGTGGATTTAAAGGCCTACGAGTTCACAACCCGGTCGCGTCGCTGTCCAGTATAACGGCTGAAGAGATCGGCTTGAGATTGTTGGCGTCGATCTACAAGTAGGCTTATGTGTCGCGTAGATGGGTTTGGGTGTCGATCTACCGAGTGGCGTAGCTAGCATTTCCTGCTTGGTATACCCAAACAGCTTAACGTTGTGGAATGTACGATTACCGATTTGGGCCCGCATATCCCATAGTACTATTTGTGTCCCGTTACACTTCGTGTTGATTCTGCTCTGATTCCGGCGCTTAGTTGGAAGGAATTAAGAGTGACCGTCAATCCCCACATGGCGTACCTACACGCCTGCCTCCTTTTTGAGATAAGCAAATGTTCCCTGTACGTACGGCGTTTTCCGGCATGGGTTAAGGATATGTAGCGCACGTGCAGTGGGCTGTCTTGGCGCCAGAAAGACGAAACGCATGGCGGCATCAGACTCTGTGCATCCACAAGTATTTATCGCCCTGTACAAAGCGCACTCCCTCCACTCACCACAGCGGACGCGATCGCATCTTTAAGCGGTCTTTGGTTACCATTATATCTCATCGGTGGGAACTGCGAGAATGCGAAGCTTACACCATGCTCTTATTCTGAGTTTTAATTTATGGGCAGCTGATTTCACGTTCAATCGTATATATTGGGGGTCCCGTCGGGTTCTATGGTCTCTCATCTTAATTGCACGTTATACATAGTCAGCTAGGGACCGGAAGGACACAACCCACGGTTTAAGGACGTAGTGCGGCCCGGGAGGTCAGTGAGTCCGATTCTTTATTGTGTGCTGTAAACGGTGCAACAACCTTATCCCTTGTTAAATGTGCGCACCTGGGTCCCTTGGAGTATAGTGACTGTTCTACTAAGTCCCCACGGGAAGGAGTTGTACACTTAACACTCCTCCGTGGAAGCCTAGTCTCATCGCTTACGTGACTGCTCGGTAGACCACGACGTGTACTGAGGTATTATATGGCCTCGGGGTCGTCCATTTATATGCACGTTTTAAGGCAATTGGGGGTCCAGGACGACTCGCTCCTACCAACGAGTTGTGTCATTCCAAGATTTTGTCAGGGGTAGTACATACTACCCGCGTCCGTTCTGATTAGACCGATGCTATTCCCCGTCACATAGGCCGTACCTAAACCCCTCTGTGCATTTCAACTTCAAATCAAATCTAGTTTGGCGTATTCGCAGAAACAAATGATGATCTCCAACCTTGTGGTTCCCAATCATGGTTAATGCACATACCTGACGTTAGAATGACAATTCGCAATTACTTACAGCGCTGCTCCAACGCGCTGCTAGCATTCGTTACCGGGCGGTCGAAATTCAGACGACTTAGAAGGGTTATTTGGTGGTCCGGCGCGAAGAAAATATCGGTATGGGGTAAGGGATGTGTGGGTGACTACTTACTATAGAAGGATTGAGAATATTAATAACTTATGCTCCCTCCTGGCAAACGACAGCCGCGCCTGTCCCACATCCTGCGCGCTCTTGTTATTATAAAATTAAAAACACGCGTCGGGATACCCAGCTTCCGTTTTCGCCAAACATTGTATGGTCATGAGCTCTCTGGGATTGAAAAAAGGTTTTCCACAGGGATCTGCTCCGTTATTACGATATAGCATTCAGTCCGTGAGTGAGTGATCCCGTGTGACACACCATGTCATCCTGTAAACGAAAACGACACATTCATCCACGACGACCGCCTCGAAGATATGCTCATACGCTCTAGTTAGGGTGTAAAAGAGTGAGGAATACTTAGAAAGGGGTCATGTACGGTGAAGCACGCAACGATCACTCCAATGTCGGGGATGTTGTAAAGTCGGCACGCCCTTTCTCTGTCCAGCCCGAGCAATCAACCTGTGGATCGAAGAAAGCCGTCAGCCATCTTGTTCACGCTTTGTCCAGGGTGTGCTAATCAGAGCCTCAGTAACCTGCTAGGGCCGTACGATTCGCTGCGATACGGACCCGACCGCCACAACTCACAACTTGACGGAGGCACGTCTGCACAGGGGAACGCTGGGAATGACGGGAACCATTGTCCATATGCTGCCGACCGCCCATAATCACAGACATTGCCAAAAGTGAGGATCGGATTAGATCCCTGGAACGTGGCAATTATGCGACGGTACGGCGGTAGAACGTGTGACGTTCTTTCTTGCGCTATCGGAATCCTATTCCCTAACATTAAATGATTCCTGTCGGAGTTCACAGGTGGTCAAGACCGACCTCGGGACCAACTTAACTTAGCGAGGGCGCGATATGCGTCATTCAAGCAGGTGTAGGGGTCAGGATCCTCCGGACATGTGCTGGCGTGCCGCCCTCTAAGATGCTGTTTCCACAGCTTTTATTTATGGTAGGGCACAATTCTCACATCCGTCCCGCAGTTTGTCAATTTGGGATGCAGTGATGGCGTGTGGGCATGATTAGTAGAACCCTTAAGTGTGATGCAGTCGCGGCAGTTAATGCGAGAGTCCATGCCTAGCCACGCTCCTCACTATATTGTGTCTCGCGCGCCATACGTGGTCATCAATCAACTTCATTCCTTGGAATAAGGTGTGCCATTTTTAAATGCCCCTGGCCTTTGTGGACACCTGTCAGCTCCCCTGGCATCAAGCAAATGGTTCTGTCAATATAATGGATCCCCACAGTGTATGTTACTATAGTGACACGCACTAAGCAATTCACCTCACCGTTGAGCTTATGCGCCCACTCAATCGCAAGGCTCGACTCCGATTGCATCTCAACGATACTTAGTCTGAGCTCCCAGGTTCGGTATTTTGCTAGGGATAACAAAAGTGCAGATCGCACTAGGGGAGACATGGTCACACTGGAATCAGGGTACCTTATTCTCCCGGGGAAATGCGCCGATAAGATGGCGATCCTGCTGTCTCAGACATGGCGAACCCGTTGCAGCAGATTAATGCTAAGTATCACATCGATGAATGCCCCCTCTTCTACCGGTCATCCCAAAGCGAGGCGAACTGTTGCTTACCCCGGCAATCTCAATAAAGCTGGCCCCCGATATTTCATTGTCCCTAATTATGACGTGTATTATTTACCGGACCCAATGAGTCAAGTCGGGTCCGCGGACTCCGGAATACTTCTTACCAGAGACGGTAAATATACAAAGCTGCGTACTGTTAGTTGGAGTCTGAAACTCCTGGGTATCCTATCCCTGAAACTCCTGCTCAATCCCCAACAGACGTGTCGACTAATGTCTTGTGTTCTACGTCAAACACAAATAGGTTACCAAAAAGCTGATCTTAACTACGGCCTTAGAATTGATGATACCAAGCGTAATTCAACGATGGGGTCGCTTATACTGCTTTGTATACCCTAGTTGAGATGTCTATCTTAAGAATGCGTTGAACCTAAATTGGCATCCTCAGGCGCGCGGTAAGGGTGTCGTGTACGCGCGCATATCTTGCCGTATAGTAACCCTATCTGAATAACCGTACAATTGCGCGATCGCCAGCTATTTTGTAGCAGACTTTCTTAGAGCGGTATAGCGAAGCCGTACGCCGCGATGTTTCACAAGAACAGGTATGGATCGAGTCCCCGGCCCCCGACCCCCCGTCATATAATGCGTTTGCTGTGGACATTACTTGATTACCACCCTGTACACGGGAAAAGTGGATGCTAAGAACTTGGTACGACTCAAAGCGTCATATCTAGGTATGCGGCGAGAAGACGATTCGTACTCCCTGCAACTGCGGGGCGAATGGGCGGTATGGGGTGAGTCGTCGACGCGCTAACCATAATGGAGTTTAGTGACAGGCACCGTCCCGCAAATCCGCTGACAGTGTTCGCCACGATAAACATAAGTCGCGGGGGCGAAGAAATAAATCAAATTAACATAGTTCGCTCCGATTCTCGGTGGTCATCCGATAAAATGGGCGGCCCAAAAAAAGGTATGGAAGTTTCTCCGCGCTACTTATCGGACAGTATTTGACAGATTGACACTTCCCAGCTTCTCGGGTTTGGTAGGATACTCGTCGTACGCATACCCAACTGTGCATCCCGGGTATGAATATAGGTCAGCTATGTCACTTAGTATTCGGACCACGCGTCGCCCTCGTCACAAAGTTTTGACGTAGCATATCGGGGCGCTACGGCGAGCAGACTTCGGCGCTTAACCACGGCAGCCTGATGTTTTGTACGGGCGATTGCACAGTCGTCCTAATAGCGCGTCAAGTCGACGCTAAAACAAGTACACTAACAGGTACGTGAAGACGGAATGTTCTGATGAATTAAGCCTGATAAACATTATCGTTTCTTTCGAGTCCCAGTTTTCCCCTTGAAGCAGAGAACGTGTATGTGAGTCTCTAGTAAGTCCCCTCGTCCAATATTTCGGAGCAGTTGCGGTCGCGCGTAGGCGGGTCGTAGCACCATCTTTAACCCACTAAGCCCGTCATCAAACAATTAAGCGAAGACCCCGTCAGACCCTTACGGTTATGAAACACTCCTTGGACATCAAGTACTTGCCAGCTAGCCGACTATCCTTCACATCTTCACTGTTTACTCTTGTTGGACAAAATATGAGTTAAGCAACTAATGTAACGCGATCGGCTCACGTAACAATGCTAGCAGGTTCACAGCGTAATAGAACTTACCGCTCTGCGAGTGTCCAACGGGAGTCTCCGTTAGCCTACAGGCCAGTACCA +10 ATTGCTCGAACCGCGCGAGTGACTCTTACGTCAGACTTCCTAATCCATACTAGCCTATTGAAAATTGCAGTGCGTTGAGCAAAAGGTCTCTACGAGGCCGTGTGTCCTATAGGGTAGGTTAAGCTAGACCTTCCGTCGCCGTTATCCTACATGATGTTCTTTAGAGAGGGATCCGGGTTTACATGGACATACTCAATATGCTATCGCGGCGTCTCAGGGCCGAGTTTTGCGAGAGAATAACACGTATCGCAGAGAAACGAAGTACTATTTTGCTGAGGAACGGCCACTGTGTGTTGTTGTCAACAAGGAGATTACGAACGGCATAGGCCAGTATATGTCAGCCAGTTGATAAATTGTCCTCTGCTGGCTGTATTGTAGCAGAGCACTAAAGAGCACGATCACATATGCCTGGGTCGGACCTTGGGCCATTTATGCGGGATTTCAGAACAATAGGCACGTCTACACGATGTCGGAGGTTTAAAGGGATACTTTAGGTCGCAGGGTCTTCCCGACTTAGATATTCCCGGATAGGCAGGCGTTACGCAATGCGTATATATGGGGTTATACACCTGTGGGCTGCATGCAGCTAACTAACCTAACCGTATTCAATAAGAGCATACGAAGTTGACGCCACAGCGGGCCGGAATTACGGCTAACTTTACGACTGTAACGCGATCGTCTCTGCGCGGGACCTCTGGCTTCCGCCAAGTGAAACGCTCCCCAGCCAATTTATCTCCCAGCCCTGACAGCCAGGCTCTTGTAGCCGGTCTACGCGTTCTTCCTCGAGGAACAGGCAGGAATGTACAAGCAAATTACACTTATATCATGCGGTTCGTCAAGCATGTGATCCGGACAGATTTGAACAAGAGCCGACTCTTACTTCCCTTTCAAACCGAGACTATGACTTCCAGATGAATGCCTGGTGGCCATAGTATTGGTCTCACTAGTCGTGGAAGGAGGGACACGTTGTGGAAGACCTTTTGGTAAGTAACACTATCATCGCATAGCAACCACCTCACTTACCTAAGTGGCATGCTGAGGCCAGCAGTGCCCCTAGCCTAAGTATCATTCGAGCGTCACTTTAACCCGAAATACACGACTACCCTAACTACTTTACTAGCTATATAATTGGTAGTCTTGGACTGTTATAGTGGGCACGAGAGGCCTAAATCAGGTACTGATTAAGAAGCAAGGTGCCCGCGCGTGCAATTGCGGTCCATAGGAAACGTCGGTTCGCCCGACCGGAAGTACCCGACGTCTTGGATAATTCTAGTCTGACCAATACGATAATTAGCCTCAGTGCCCTCTGGGACTTGAACGAGCTGCCGCCTTCACGGCCGGCGAGCCTTGGCTCCCAGGTAAATCATGACATCCTTGGGGTATTGCAGGACGGCTTCGCCCATGGCCCGGCTCGACCGGACCCCTTAACACTGGGACGACTTGACAGTGGCCGTCGAAACAACTACCTCATTGGGGATGATGACTCACGATGGTCTTCCAGACCCAGGTCTAGGAAAGGCTCTAGGTCAGAGAGATCCAGCCATTCTCTTCAAGAACCACTTACCCAACAGGCTTTCGAAGCACTACCGCTTTTCGGCCGATTGGGGCCACATGTGTGTCTTACGGACGCACGAAACCAGGGGGTATACCATTCTCGGGGCGATATGGTCGGACGGTTTGCGCGGCTTTACGGACCATAGAGCGCGCTTGCACGTACCTATTTAGGAATTCCGTGCAAGCTTAAAGAAGGTCGGTTTGTAAGCAGATAGCAGTAGATCGTGCTTCGTGAGACTTTTTGACAAAGTCATAGGCATTGGGCTCTGCCAAAACCTAGTGGCACTCTTGGCCGAACCAGGGGCCGCGATTCTCAGGGCATCGCTTCGATGAATGATGGGATTTGCTATACCGGCAACCACGTAAAGTGTGTAGGTCACAGGAAGAGCCAACAGTGAGCCGGCGTCTAAGTTTAGACTGGTAAAGGACGCCAGATCCTAGTGCTGCAACGCGATTCTTGCGTGGCTCTGTTAGTGCCTCGAAAGGCGTAACTGACATTGTCGATCACGGGTCGCGCCGGTGTGACGATCTTAGCCGACTTCTAAGAACGCAGGTCTGCCCATACAGACTGTCAAATGTTCTAGAGTGTACGGGAATTAGGCACTAGGCTAAACTTTAAGCAACGACCCGTGGGTGAGCATCGGCCTGGAGAACCCCCGTCAGCAAGGGCGTCGGAGATATGGGATGAAGGCTATGCAACACTCCAGGATAGCAGCGTACGCTCCGTGTTTGAAAGTATTGCAAGTAGCACAAAGACCTCATACAGCTTGTGACGCATCGTACGGGGTGGCCTGCGCGGGGGAGTGCGATTTACCACGTCATTGTCTGCCCAGGTCAGTAGTCCCCTTAATTTTACCGGGGGGACGCACAACAATTGCCGCATAACGAAACATCTCGGCATGAGTAGGGATAAGCCTCAACCTAGTGTAAGCACAGCGCCCCGAGCCTCGAGGAAGTGATTTTCTTTGTTTGGCACATATAACAGTACTAGCAGGCTATGCGTGTACATTTGTCCATCCTGTCCGTCCAAGGAGATGAATCCATTCACTCTTTTTCCGTATTGCCCCCACAACTGTTCTTAGATTCAAATGGAGCTTAGCGTAAGATAGTCGAACATGTATGTCCAAAGTCTTGGGAGGCCTGCGCAGCGTTGTGACAATACCGATGGCCACTAAGGTGTTACACATTACTGGCTATAAGGATATAAATCCGGTCGACATAAGACTTACGTTCAGTTGTCTCTTTATGGTGGGCCGTCGTTGGCGCCATTGAATCTTGAATTACCATTGTGACTTATCGGGACTCGTATGGGTATGGATTCGCCAGAGCCCGTATCGCGTGGTATAGGATAGTCAAGAGGCAAAATTTTTTGCACTTGTGTTAGCGTTCATCTGTGCGTCCCGGAATCTATGGAATTGACTACCGAGTAAAGTGGAGTAACGCCGGGAAAAGGCCGTAGAAATCCCACGCTAGGCCAAAACTAACCCGGAACAAACGGCATCCTTCATATTAGCAACTTGTCTAGTATAATGGAGGCTTTCAGATAAAGCCAATCTTCTGAGTATCGTGACCCCCCGATAGGCTGGTCGTGTCAGGGTCTAGGCGCCGTCACATTGCCGCTGTGAGGCTCGCCACTCGGTTCAATTTTAAAATGTGCTAAGCCTCTAACGACCGGGGCCACGAGCTTTGCTAGTTCACCTATGGGTGTTTAGGCGACGCCAGCCGCGGATTTCTGACAATAAAGGTTATCAACTTAACAGGAGGTGACGATCAGATGCACTCGCAGGTCCTGTCTGAGGTGAGAGGTCTTACGTACATAATGAGTCGTCTAAACGACGAGGTAGCATGGCAATTAAGCATTAGCGGAGGCGCGGTGGGCTTGCTACAAAACACGAGTTCGGGGGCGAGGCGTACCTGTTCTAAATCCGCAGTGAACTATGACATGTCACGCTGCCCTGGTCAAGTGTACCGAGTGACTTAGCCTCGCTTCGTACCTTTTAGTGTTTCATCAACTTCACAGGGCCTGATCCGTGACTTACCCCAAAGGGCCCTGGCCTTTATCCACATTGCGAGACATGCCGTTGCGGCCAACTAATATCTACCTTTATTAAATTGGGGGGCTGTTCCTTAAAGAAGATTCGAGTTGCCTCAGACTCCTCGGACGTGCTCGAACGTTCAAGTCATGGTGGAACCCGCCCTGGAAGACAGGTCCAAAATCATTTGCTGGTACAATTATGAGCGGTCAATGTGCTAGAATTAACGGGTTAACCGTTGTTCTGTACTGTGAATTGCATAGCGATCTGGGTAGCTAGTATTTCTGCACCCCCATCCAATCGCGGTGTGAAAGATCCACGGATGATACTCTCGCGGCCTCGCTACCCCTCAGAAATGAGCCCCGCTTAGGGTAATGAACCAACAATATTCACTGGGGAAGCTCATGGGGACAGGGGTACAAGGCACCTGGCTGAGCAATTACCCAGGATTGAAGCGCGGTCCTAGTTGGAGGAAGACCGCACCCGGGTAGCAGTCAGTCACCCTATCCTCCGGAAGAGGATGGGGCGTAACCTTATCTCTACAGTTCGCCGCAATCACTTGTCAAGCAGGTAGCCATTTAGTACCATCGTGCCATCATCGTGATCTGCGTCTGGGGCTGCGCGTGCTGCCCGTGGTCATAAGAAGTGAGTCCTTCCTCTTGAACTTCCGCGGATGTGCCCCAGAGGTAGGAGACACATACAACATTGCTCTCGACACAATGTGATTGTTAAACGACCACTGAGACCCCATTTCTGTGTTTGCCGTTTACAACCCAGTCAATTTTAATACCTTAGACGGCTTGTATTATTCCGACAGCATAGTGTGTTATGCGTATGCTTATACAGAAGACGCAACGTGGTGGAGGTTACACTATAGCCAGCCCCCAGCATGGGGGGCCGTTATATGGCTTCAGTTAACAACAAAGTTACTGTGCCTCCAGGGGACTAGTTCTCCGACTTGAGATATGATACACCTACCCCAACCATGAATAACACACTCATAGGCCAGAAGGCTGCTTACCGGCAGATAGTGGTAAATTGATCGATGATGAGTGTGGGATTATTAACTGACAATTGCTAGGCGCCGCTGCTTCAAGGATCCGCGAGGTTGCACGTCCGTAGGGTTGCTAGATGGAGCCGTGATTACAAGGGAAAATGTGACCGCGCCCATCCTAGACCAGGATCTGGAGGGTAACCGACCTACCCAGAAGACGTATGACCTCACATCCCCTTGATCTGTGCACAACTTGGTTGCAGGTATGTTCATCTCCGGGGATTATTGCCATCAGGCTACGCGGTATTTCCGAAGACGCGTTGAAGATACAGTGCATGACATGCCACCAAGCCTTTGGAGGCATCAGTGGCCATCGATTCTATGGTTTAACTGACGCGGTCAATGCAGACATTAGTGAAGGCGTTCTGGCTGGCCGTTAATATCGGTTCGGGGATTGTCGTCCTCGTTTTGCGGGTTTGACTCCAAATGGTATAGAAATCGGTCACGGCAGTTTCTAAAAATTACTCGGAGCATTTTTGTTTGCTGTGAACGCGGGCCTTGGGGGATGGCCGAGTCGTAAGGGACACACCCCGAATACCTAAGTGAAAGCGTGTGTCCCTATTAATGGACTAGCGCATGGGTCGACTCCCAACCGAAGTAGGACTCCAAAAACGCGATATTAGGCGGTCTCGTCAGGACTTCATGGCGAGATGCTGAGCCGTATCCTATTTTGTACTCTTTGCGGATTAAGGAAAGACCCCGTTTCATAGATAACTAGTCGGATCAAAAGAGATTCTTTGCGTCAGGCGTAAATGAAGCATAATCCAATATTTTGCTCTTCTTGCCATTCCTGCCTTCTCGATCGCATCCCTGTCCAATGAATCTCCCCACTCCATTGCCGACTTTCGGTCACGGTCTCCCGCCGTAAGTCGTTAGAACACATTGTGAGCAGTAACGTAGTCTTGTCCGTGAGACGTAAAATACTGATAATACGTTGGTTGGGAGGACTGTTCGGGTCCAGCTACAAGGGCCTTCAGCCCGTTTTCGATGGCTTCCGATGTATGTTACAGTGACCGACATATCCTCGATTGTATTGCGGGACACGTACATCAAACAGTAGACTTTTAACATAAAGACACAAAGCTATAACCGTGTTTTGACGGTACATGGACACTCACCAGCGATCTTCGAGGCGTAGCATGGCATTCATGGAGGACGCTCTACATGTATGATTGGGCGCGTCCAAAAGTGGATCGCATAAAATCTCCGGGTCGAGGGTTTGGGGGGTACTCTCTATAACAAGGAGATTTAAAATACGCGTGACCATACTGCAGTACGGCCCAAAAGGAAATGCGTTCCCGGGTACAGCTTTTTGTTGCGCCGGTTTGATGATATTGTGGCAGATAATCCACCTCGGGGGTAGGCTGTTCTTGGATGCTGTATGAGATGTCATGGCCGCATACCACCGGAGCAACGTGGCGAGTACTCGACCTAGGCCTTCCCACCTGAGAGCATGAGCTGCTGCGATATTCGAGTTGTCTCAGTGATTTACTTATCTGATTCGCGGCCCCACTGACGGCCAGACCACCTCCGCCCCCTGGAATAAAACGCGTCCGGGAGAGACTCCACTCATGCAATGAATTGGTTTCAAAGTGCAGCCCAGTGCGTCCATTGGGACGTTGGTTGCATGCACCGGAATGATGAATACCCCGGTAAAGGCGTGGCGTTCTATGTCTTAGTTTGCTCCGGAGATCTGCAACATTAGTTCCTACCTACATACAGACTATTGCATCTGTAGCAGGAGAACACGGGTCTTCGCGGACCATTTTTGTCAATGTTCAACAACTTAGAACTAAAGTTTGGGCCTGCCATATCCGCCCCTAGTTGCGTGGTTTTATGTCTATACGGATCCCAACCCGTGCTTAAATAGGTTATATGAAGGCTATCGCTTTAATAACGTGTTAGGACGATCCCGCCTCGCAGTTCGAAATGCCTGCTCTCCCATTCAATAATTGCCTTAAGAAACGTGGTTGCACTGGTCTCATCAGACTACTGGACTGAGTACTGATGCAGACCGGCTACGTTTAGACGGTCTCGCCATGTACGCAGTGGCGTGATTGAATACCCCCCCGACGATATCTGATCGCACCCGAACACCCAAGTGAAGCAAAGCACACGGGTCAGTACCATCCTGCAACTCCGTAAATAAGGGTTATCGGGGCCGCGTACGGCCCCCATGATTTATTCTCGATTAGTTGAAAGTTTCTCTTATGTCCGGGCCTCGAGTGTCCTCAGTACAGCAAAGGCACTTATCGGCTTTCTCCTTCAGTTCAAGTCAAGCTTTATCACTCAGGATCCGTGTTTCATATGTCATGTGGACAGTTCCAAACTGTGCGGCCTATGTCGGAGGTCAGGCGGTTTATACCGTGATATATCCTATCTCCGCTTGTATCAGCGTGCTTATTGACCTCAGTTTCGTACGAATCAAAGTTTCACGGTAATTACCTTCCGAATTTGGGCCAGATGATTCTTTGCTCCCACGACTGCATTGTTCCCGTGGACTTCTGAGATGTGACGTTCCAATCTTCAACGATACGGGGGGATATAAATGGCTAGTTCCTCAATAAGCTACCATTCAGGAATGTGTGTGTTAACGGTTCCCATCGGGATCCAGTCGAAGCGACCCCTCGTGGTGGATTGACATCGAACTGTAGCCTACAATGCAACCTGCTCGGCAGGGCCCGGGGTGCCAGCAATACGAGAACGTCTCCGGAAGTGGTCCGGGTACGTTTGCACCGGATAATCTTGATTCGCCGCAGCTGGCATATCGCAAGCTATCTGGCCCCGAGAGAGCCAAGCACCTATGTCGGTCATGAGTAGGCTGGACCTAGGCTAGTGTACTTGGGCAGTTGCACCGGTATTGCGGCTGGCTCGACATAACGCCGACTCCGCGATATCTGGTCAATCTAGTCATAGCGCCATTGTTCCTCGAGCTATGGCACTGCAATAGCAAAGTCACATCATGACGCTTCCGGCTGTTTTAGTTTCAGGTGTTATGTGGGGCTTCAAGTTGCGTGCGAGATAGATGAGGGGGTACCAGAAGCAAGTCAAACTACGGCAGGCGTGATGCTCAGAATTTGAGTGTGCAGTAGAGTAATATGGCCCGCACCGTTTCCTCCATGAGGTTCGTTATGGCTTACACTGTGTAAATTATAAACGAGCACCGGTTTTTGCTTGGCGGTTCTGCAATCTAAACCCTGCCTTGGAAAGTGGTTTAGTAGGGATGTGATTTCACGACTTCATAGTAGGTTACTCTCAGGTCTTACAACACTTGGCGGGCGGCTACAAGAACCTGTACCACCGTGATGGAGAGCCGCACCCATGTATTACACATTAGCAGGCCACTTTATGGCGAGTAGATGGTTCATACGTAGGGCGGACTGTTTCCGCCGTCTGCTCAGGAACCGGACATTGATTTTTACGTCTCCCCGCACAGGGGCGGGCAACTTAAGGGTTGCTAAGAAGAAAACACACGGGCGCAACGGGCAATACGTCCACTAGACGGCGATCGCGACTCGGACATGAGGAACCCGGGTAAGCCCATCTCAGTTAACTATAGGGTTGGAGCCTGAGGGGGTTAAGCGCATTCGGGTTCGTTTTCGTGGCAGTACTCCCGCAATACAACCTTTGGCAGCACAAGTCACGTTAAAAAAGACGGTCGAGGACCCAAGTAGTTAAAGTTTGGCGCCCTCTCGCCCTCTACAATGCTGCCGTTGGTAGCGTATTGCCCCACTTGTATGACACGCCATCTGCGCGTGTGGATGTCGGAGAGCTGAGAGCGCCTTTCTTGGGATGCGTACTAGAGCAGATGGAGCTAAAGCGCATGAAGTATACTAGTGTTCAGCGCACTCATCGCACCCGCCAAGAACTATCTCGTTATGGCACTGTACATAGGTGTACAGCTTTCCGTACAGAAGGGACAGGTGTAGACTTGGGTACGACGTCCTGCTGCTGGAGGACATACCCCCAGCGGGCCATCGGCTCTGTTAAAGCGAGCCAGTCACATCCAGAGGTCGATCCCATGAGCGTCTCCTGCGCTAGGTCCTCACTCTTAGAGACCTTCGGCAAGTAACTGAATCTAGCGGCAAGGCCCGTAGCTTTAGCGGACGAAAATGCAAAACCTGTCAATTATTACAATGACATGCCATAGACAATACTCGTGTCCCGAAGAGTACATCGGCGATGCGCCGAAGTTAAGCAAACCCCGCTGCTTAATCGGTTATCTCATACCAAGCCAAGATAAAGTGACGACCGATTTATAGCAACCAATCAGCAACCACGATACAACGAGAGGCAGTTGGAAAGCAGGCACCGCGTGTAAATTTTCGAATGCCCTGTAACACGAAAGGGTGTTCTCCTACATTCCTCCTGCCCCTCGGCGACAAACTTATCCCGACTATGCCAAGTCACACATCCAAAGGAGTTGTAACACAGTTATCCAGTCTGTTAGAGAGTCACCCCAGACATAGAAAGGCTCCGTCCGAAAATAAATGACGGGGAGTTATATCAATCACATCCTACCCGGGACTGGGTCTCGTCAGAGGTACACTCACGATTCACCCTTAGAATCGAACACTTAGGGGCCCAATACCGTACAGCTCGGTGATGTCTGGGTCAACATTACTCGTCGTCGCTATTTTTGTCCCGTTTCTCCCCTAAGGGTAGATAGCTTTATGGGGGCCTGAGCGATACCATGGACTGTGGACTCAGAGTTTCTATATCGCCATGCAAGACTCACGCTAGGTCATGCAATGCGCCCGCGAATAACATGCGAAGGTCCCTATTATAGTACCAGCACCTATCTGCAATGAGATGAATTCCGGAAGCGACAAACAGGTGCCCGGCTGCCGCGAAGGATTACGATGTCTCTAGTTTCCCGTCGGCCGTATTACACAGCTACCCTAACATTTCCCCTCCAGCCCAAGTTCGGGAGCTGCACCTTTTTTGACTTCATTCTCGCGTGAGCGAACATTCGAAGCAATTGCTATAAATTTTATATGGGTTCTGCGTTGTATCAAAGACTTTTACTTTTGAGTCTCTATATATCGTTAGTGCTCACCCAGAAAATACGGTAGCGTTATGTAGGAGTCAAGGGCAGTGGACTCGACCGATGTCCTAGGGTTTTCGACACAAGTACACGAATGGTGGGAGGACAACCAGTTATAGTGTGCTAAATTTTACTTCGCGAGTAAGAGTCGAATACACAGGCGGGCAAGCTCTAGGAAAGAAGTATTTGATGTAAAAACAGTTGAGTTTGCTGGACAGTACACGGTGGGGATCGACGCTCCCGCATACTCGCTTGGCACCCCTTCAGTATCTTCATCACTAAATCGGTGTCCGGGAATTAACAAAATAGTAG +; +end; + + diff --git a/JC+GAMMA/test3.py b/JC+GAMMA/test3.py new file mode 100644 index 0000000..6cded7e --- /dev/null +++ b/JC+GAMMA/test3.py @@ -0,0 +1,398 @@ +################################## +# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree +# topology, and computes log-likelihood of the topology under Jukes Cantor model +################################### + + +import readSeq +import random +import re, os, itertools, sys, glob +from itertools import chain +from scipy.stats import gamma +from math import exp, log +class node(object): + def __init__(self, ndnum): # initialization function + self.rsib = None # right sibling + self.lchild = None # left child + self.par = None # parent node + self.number = ndnum # node number (internals negative, tips 0 or positive) + self.edgelen = 0.0 # branch length + self.descendants = set([ndnum]) # set containing descendant leaf set + self.partial = None # will have length 4*npatterns + + def allocatePartial(self, patterns, rates): + if self.number > 0: + npatterns = len(patterns) +# print 'npat', npatterns + self.partial = [0.0]*(4*4*npatterns) +# print len(self.partial) + for i,pattern in enumerate(patterns.keys()): + base = pattern[self.number-1] + for l in range(4): + if base == 'A': + self.partial[i*16+l*4 + 0] = 1.0 + elif base == 'C': + self.partial[i*16+l*4 + 1] = 1.0 + elif base == 'G': + self.partial[i*16+l*4 + 2] = 1.0 + elif base == 'T': + self.partial[i*16+l*4 + 3] = 1.0 + else: + assert(False), 'oops, something went horribly wrong!' + + else: +# rt = [0.03338775, 0.25191592, 0.82026848, 2.89442785] +# rt = [2.89442785] + +# rt = [1.0, 1.0, 1.0, 1.0] + + npatterns = len(patterns) +# print 'patterns=', patterns + self.partial = [0.0]*(4*4*npatterns) + like_list = [] + for i,pattern in enumerate(patterns.keys()): +# print i, pattern, patterns.keys() + m_list = [] + num_pattern = patterns[pattern] +# print num_pattern + + for l,m in enumerate(rates): + + + psame = (0.25+0.75*exp(-4.0*m*(self.lchild.edgelen)/3.0)) + pdiff = (0.25-0.25*exp(-4.0*m*(self.lchild.edgelen)/3.0)) + + psame2 = (0.25+0.75*exp(-4.0*m*(self.lchild.rsib.edgelen)/3.0)) + pdiff2 = (0.25-0.25*exp(-4.0*m*(self.lchild.rsib.edgelen)/3.0)) + + num_pattern = patterns[pattern] + pAA = psame*(self.lchild.partial[i*16+l*4 + 0]) + pAC = pdiff*(self.lchild.partial[i*16+l*4 + 1]) + pAG = pdiff*(self.lchild.partial[i*16+l*4 + 2]) + pAT = pdiff*(self.lchild.partial[i*16+l*4 + 3]) + + pAA2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pAC2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pAG2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pAT2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromA_lchild = pAA+pAC+pAG+pAT + pfromA_rchild = pAA2+pAC2+pAG2+pAT2 + self.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild + + + ###################################################### + + pCA = pdiff*(self.lchild.partial[i*16+l*4 + 0]) + pCC = psame*(self.lchild.partial[i*16+l*4 + 1]) + pCG = pdiff*(self.lchild.partial[i*16+l*4 + 2]) + pCT = pdiff*(self.lchild.partial[i*16+l*4 + 3]) + + pCA2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pCC2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pCG2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pCT2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromC_lchild = pCA+pCC+pCG+pCT + pfromC_rchild = pCA2+pCC2+pCG2+pCT2 + self.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild + + ####################################################### + # + pGA = pdiff*(self.lchild.partial[i*16+l*4 + 0]) + pGC = pdiff*(self.lchild.partial[i*16+l*4 + 1]) + pGG = psame*(self.lchild.partial[i*16+l*4 + 2]) + pGT = pdiff*(self.lchild.partial[i*16+l*4 + 3]) + + pGA2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pGC2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pGG2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pGT2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromG_lchild = pGA+pGC+pGG+pGT + pfromG_rchild = pGA2+pGC2+pGG2+pGT2 + self.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild + + ####################################################### + + pTA = pdiff*(self.lchild.partial[i*16+l*4 + 0]) + pTC = pdiff*(self.lchild.partial[i*16+l*4 + 1]) + pTG = pdiff*(self.lchild.partial[i*16+l*4 + 2]) + pTT = psame*(self.lchild.partial[i*16+l*4 + 3]) + + pTA2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 0]) + pTC2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 1]) + pTG2 = pdiff2*(self.lchild.rsib.partial[i*16+l*4 + 2]) + pTT2 = psame2*(self.lchild.rsib.partial[i*16+l*4 + 3]) + + pfromT_lchild = pTA+pTC+pTG+pTT + pfromT_rchild = pTA2+pTC2+pTG2+pTT2 + self.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild + + site_like = (sum(self.partial[i*16:i*16+16]))*0.25*0.25 + site_log_like = (log(site_like))*num_pattern + like_list.append(site_log_like) + log_like = sum(like_list) + return log_like + + + def __str__(self): + # __str__ is a built-in function that is used by print to show an object + descendants_as_string = ','.join(['%d' % d for d in self.descendants]) + + lchildstr = 'None' + if self.lchild is not None: + lchildstr = '%d' % self.lchild.number + + rsibstr = 'None' + if self.rsib is not None: + rsibstr = '%d' % self.rsib.number + + parstr = 'None' + if self.par is not None: + parstr = '%d' % self.par.number + + return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) + +def treenewick(): + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) +# print script_dir + path = os.path.join(script_dir, 'tree.tre') + with open(path, 'r') as content: + newick = content.read() + return newick +a = treenewick() + + +def gammaRates(alpha): + bounds = [0.0, 0.25, 0.50, 0.75, 1.] + rates = [] + for i in range(4): +# print i + lower = gamma.ppf(bounds[i], alpha, 0, 1./alpha) + upper = gamma.ppf(bounds[i+1], alpha, 0, 1./alpha) + mean_rate = ((gamma.cdf(upper, alpha+1., 0, 1./alpha) - gamma.cdf(lower, alpha+1., 0, 1./alpha)))*4. + rates.append(mean_rate) + return rates + +def prepareTree(postorder, patterns, rates): + likelihood_lists = [] + for nd in postorder: + likelihood_lists.append(nd.allocatePartial(patterns, rates)) + print 'log-likelihood of the topology =', likelihood_lists[-1] + + +def joinRandomPair(node_list, next_node_number, is_deep_coalescence): + # pick first of two lineages to join and delete from node_list + i = random.randint(1, len(node_list)) + ndi = node_list[i-1] + del node_list[i-1] + + # pick second of two lineages to join and delete from node_list + j = random.randint(1, len(node_list)) + ndj = node_list[j-1] + del node_list[j-1] + + # join selected nodes and add ancestor to node_list + ancnd = node(next_node_number) + ancnd.deep = is_deep_coalescence + ancnd.lchild = ndi + ancnd.descendants = set() + ancnd.descendants |= ndi.descendants + ancnd.descendants |= ndj.descendants + ndi.rsib = ndj + ndi.par = ancnd + ndj.par = ancnd + node_list.append(ancnd) + + return node_list + + +def makeNewick(nd, brlen_scaler = 1.0, start = True): # + global _newick + global _TL + + if start: + _newick = '' + _TL = 0.0 + + if nd.lchild: + _newick += '(' + makeNewick(nd.lchild, brlen_scaler, False) + + else: + blen = nd.edgelen*brlen_scaler + _TL += blen + _newick += '%d:%.5f' % (nd.number, blen) + + if nd.rsib: + _newick += ',' + makeNewick(nd.rsib, brlen_scaler, False) + elif nd.par is not None: + blen = nd.par.edgelen*brlen_scaler + _TL += blen + _newick += '):%.3f' % blen + + return _newick, _TL + +def calcActualHeight(root): + h = 0.0 + nd = root + while nd.lchild: + nd = nd.lchild + h += nd.edgelen + return h + + +def readnewick(tree): + total_length = len(tree) + internal_node_number = -1 + + root = node(internal_node_number) + nd = root + i = 0 + pre = [root] + while i < total_length: + m = tree[i] + + if m =='(': + internal_node_number -= 1 + + child = node(internal_node_number) + pre.append(child) + nd.lchild=child + + child.par=nd + nd=child + elif m == ',': + internal_node_number -= 1 + rsib = node(internal_node_number) + pre.append(rsib) + nd.rsib = rsib + rsib.par=nd.par + nd = rsib + elif m == ')': + nd = nd.par + + elif m == ':': + edge_len_str = '' + i+=1 + m = tree[i] + assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] + while m in ['0','1','2','3','4','5','6','7','8', '9','.']: + edge_len_str += m + i+=1 + m = tree[i] + i -=1 + nd.edgelen = float(edge_len_str) + + + else: + internal_node_number += 1 + + if True: + assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m + mm = '' + while m in ['0','1','2','3','4','5','6','7','8', '9' ]: + + mm += m + + i += 1 + m = tree[i] + nd.number = int(mm) + i -= 1 + + i += 1 + + post = pre[:] + post.reverse() + return post + +def Makenewick(pre): + newickstring = '' + for i,nd in enumerate(pre): + if nd.lchild: + newickstring += '(' + + elif nd.rsib: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + newickstring += ',' + + else: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + tmpnd = nd + while (tmpnd.par is not None) and (tmpnd.rsib is None): + newickstring += ')' + newickstring += ':%.1f' % tmpnd.par.edgelen + tmpnd = tmpnd.par + + if tmpnd.par is not None: + newickstring += ',' + return newickstring + +###################yule tree################################################### +# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree +# - num_species is the number of leaves (tips) in the tree +def calcPhi(num_species): + phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) + return phi + +# yuleTree creates a species tree in which edge lengths are measured in +# expected number of substitutions. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def yuleTree(num_species, mu_over_s): + # create num_species nodes numbered 1, 2, ..., num_species + nodes = [node(i+1) for i in range(num_species)] + + next_node_number = num_species + 1 + while len(nodes) > 1: + # choose a speciation time in generations + K = float(len(nodes)) + mean_epoch_length = mu_over_s/K + t = random.gammavariate(1.0, mean_epoch_length) + + # update each node's edgelen + for n in nodes: + n.edgelen += t # same as: n.edgelen = n.edgelen + t + + nodes = joinRandomPair(nodes, next_node_number, False) + next_node_number += 1 + + return nodes[0] + +# calcExpectedHeight returns the expected height of the species tree in terms of +# expected number of substitutions from the root to one tip. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def calcExpectedHeight(num_species, mu_over_s): + return mu_over_s*calcPhi(num_species) + + +if __name__ == '__main__': + random_seed = 24553 # 7632557, 12345 + number_of_species = 5 + mutation_speciation_rate_ratio = 0.4 # 0.689655172 # yields tree height 1 for 6 species + random.seed(random_seed) + species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) +# print '#########' +# print species_tree_root + newick = makeNewick(species_tree_root) +# print 'Random number seed: %d' % random_seed +# print 'Simulating one tree:' +# print ' number of species = %d' % number_of_species +# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio +# print ' actual tree length =',newick[1] + expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) +# print ' expected height =',expected_height + actual_height = calcActualHeight(species_tree_root) +# print ' actual height =',actual_height +# print ' newick: ',newick[0] + + alpha = 0.5 ### gamma shape parameter rate categories + yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' + rates_list = gammaRates(alpha) + postorder = readnewick(yuletree) + result = prepareTree(postorder, readSeq.patterns(), rates_list) \ No newline at end of file diff --git a/JC+GAMMA/tree.tre b/JC+GAMMA/tree.tre new file mode 100644 index 0000000..2290a90 --- /dev/null +++ b/JC+GAMMA/tree.tre @@ -0,0 +1 @@ +(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112) \ No newline at end of file diff --git a/JC+GAMMA/treeLike.py b/JC+GAMMA/treeLike.py new file mode 100644 index 0000000..c3007fa --- /dev/null +++ b/JC+GAMMA/treeLike.py @@ -0,0 +1,367 @@ +################################## +# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree +# topology, and computes log-likelihood of the topology under Jukes Cantor model +################################### + + +import readSeq +import random +import re, os, itertools, sys, glob +from itertools import chain +from math import exp, log +class node(object): + def __init__(self, ndnum): # initialization function + self.rsib = None # right sibling + self.lchild = None # left child + self.par = None # parent node + self.number = ndnum # node number (internals negative, tips 0 or positive) + self.edgelen = 0.0 # branch length + self.descendants = set([ndnum]) # set containing descendant leaf set + self.partial = None # will have length 4*npatterns + + + def __str__(self): + # __str__ is a built-in function that is used by print to show an object + descendants_as_string = ','.join(['%d' % d for d in self.descendants]) + + lchildstr = 'None' + if self.lchild is not None: + lchildstr = '%d' % self.lchild.number + + rsibstr = 'None' + if self.rsib is not None: + rsibstr = '%d' % self.rsib.number + + parstr = 'None' + if self.par is not None: + parstr = '%d' % self.par.number + + return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) + +def allocatePartial(node, patterns): + if node.number > 0: + npatterns = len(patterns) + node.partial = [0.0]*(4*npatterns) + for i,pattern in enumerate(patterns.keys()): + base = pattern[node.number-1] + if base == 'A': + node.partial[i*4 + 0] = 1.0 + elif base == 'C': + node.partial[i*4 + 1] = 1.0 + elif base == 'G': + node.partial[i*4 + 2] = 1.0 + elif base == 'T': + node.partial[i*4 + 3] = 1.0 + else: + assert(False), 'oops, something went horribly wrong!' + + else: + npatterns = len(patterns) + node.partial = [0.0]*(4*npatterns) + like_list = [] + for i,pattern in enumerate(patterns.keys()): + psame = (0.25+0.75*exp(-4.0*(node.lchild.edgelen)/3.0)) + pdiff = (0.25-0.25*exp(-4.0*(node.lchild.edgelen)/3.0)) + + psame2 = (0.25+0.75*exp(-4.0*(node.lchild.rsib.edgelen)/3.0)) + pdiff2 = (0.25-0.25*exp(-4.0*(node.lchild.rsib.edgelen)/3.0)) + + num_pattern = patterns[pattern] + + pAA = psame*(node.lchild.partial[i*4 + 0]) + pAC = pdiff*(node.lchild.partial[i*4 + 1]) + pAG = pdiff*(node.lchild.partial[i*4 + 2]) + pAT = pdiff*(node.lchild.partial[i*4 + 3]) + + pAA2 = psame2*(node.lchild.rsib.partial[i*4 + 0]) + pAC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1]) + pAG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2]) + pAT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3]) + + pfromA_lchild = pAA+pAC+pAG+pAT + pfromA_rchild = pAA2+pAC2+pAG2+pAT2 + node.partial[i*4 + 0] = pfromA_lchild*pfromA_rchild + + ###################################################### + + pCA = pdiff*(node.lchild.partial[i*4 + 0]) + pCC = psame*(node.lchild.partial[i*4 + 1]) + pCG = pdiff*(node.lchild.partial[i*4 + 2]) + pCT = pdiff*(node.lchild.partial[i*4 + 3]) + + pCA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0]) + pCC2 = psame2*(node.lchild.rsib.partial[i*4 + 1]) + pCG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2]) + pCT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3]) + + pfromC_lchild = pCA+pCC+pCG+pCT + pfromC_rchild = pCA2+pCC2+pCG2+pCT2 + node.partial[i*4 + 1] = pfromC_lchild*pfromC_rchild + + ####################################################### +# + pGA = pdiff*(node.lchild.partial[i*4 + 0]) + pGC = pdiff*(node.lchild.partial[i*4 + 1]) + pGG = psame*(node.lchild.partial[i*4 + 2]) + pGT = pdiff*(node.lchild.partial[i*4 + 3]) +# + pGA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0]) + pGC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1]) + pGG2 = psame2*(node.lchild.rsib.partial[i*4 + 2]) + pGT2 = pdiff2*(node.lchild.rsib.partial[i*4 + 3]) +# + pfromG_lchild = pGA+pGC+pGG+pGT + pfromG_rchild = pGA2+pGC2+pGG2+pGT2 + node.partial[i*4 + 2] = pfromG_lchild*pfromG_rchild + + ####################################################### + + pTA = pdiff*(node.lchild.partial[i*4 + 0]) + pTC = pdiff*(node.lchild.partial[i*4 + 1]) + pTG = pdiff*(node.lchild.partial[i*4 + 2]) + pTT = psame*(node.lchild.partial[i*4 + 3]) +# + pTA2 = pdiff2*(node.lchild.rsib.partial[i*4 + 0]) + pTC2 = pdiff2*(node.lchild.rsib.partial[i*4 + 1]) + pTG2 = pdiff2*(node.lchild.rsib.partial[i*4 + 2]) + pTT2 = psame2*(node.lchild.rsib.partial[i*4 + 3]) +# + pfromT_lchild = pTA+pTC+pTG+pTT + pfromT_rchild = pTA2+pTC2+pTG2+pTT2 + node.partial[i*4 + 3] = pfromT_lchild*pfromT_rchild + + ######################################################### + + site_log_like = (log((sum(node.partial[i*4:i*4+4]))*0.25))*num_pattern + like_list.append(site_log_like) + + log_Like = sum(like_list) + return log_Like + +def treenewick(): + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + path = os.path.join(script_dir, 'tree.tre') + with open(path, 'r') as content: + newick = content.read() + return newick + +def prepareTree(postorder, patterns): + likelihood_lists = [] + for nd in postorder: + likelihood_lists.append(allocatePartial(nd, patterns)) + print 'log-likelihood of the topology =', likelihood_lists[-1] + +def joinRandomPair(node_list, next_node_number, is_deep_coalescence): + # pick first of two lineages to join and delete from node_list + i = random.randint(1, len(node_list)) + ndi = node_list[i-1] + del node_list[i-1] + + # pick second of two lineages to join and delete from node_list + j = random.randint(1, len(node_list)) + ndj = node_list[j-1] + del node_list[j-1] + + # join selected nodes and add ancestor to node_list + ancnd = node(next_node_number) + ancnd.deep = is_deep_coalescence + ancnd.lchild = ndi + ancnd.descendants = set() + ancnd.descendants |= ndi.descendants + ancnd.descendants |= ndj.descendants + ndi.rsib = ndj + ndi.par = ancnd + ndj.par = ancnd + node_list.append(ancnd) + + return node_list + + +def makeNewick(nd, brlen_scaler = 1.0, start = True): # + global _newick + global _TL + + if start: + _newick = '' + _TL = 0.0 + + if nd.lchild: + _newick += '(' + makeNewick(nd.lchild, brlen_scaler, False) + + else: + blen = nd.edgelen*brlen_scaler + _TL += blen + _newick += '%d:%.5f' % (nd.number, blen) + + if nd.rsib: + _newick += ',' + makeNewick(nd.rsib, brlen_scaler, False) + elif nd.par is not None: + blen = nd.par.edgelen*brlen_scaler + _TL += blen + _newick += '):%.3f' % blen + + return _newick, _TL + +def calcActualHeight(root): + h = 0.0 + nd = root + while nd.lchild: + nd = nd.lchild + h += nd.edgelen + return h + + +def readnewick(tree): + total_length = len(tree) + internal_node_number = -1 + + root = node(internal_node_number) + nd = root + i = 0 + pre = [root] + while i < total_length: + m = tree[i] + + if m =='(': + internal_node_number -= 1 + + child = node(internal_node_number) + pre.append(child) + nd.lchild=child + + child.par=nd + nd=child + elif m == ',': + internal_node_number -= 1 + rsib = node(internal_node_number) + pre.append(rsib) + nd.rsib = rsib + rsib.par=nd.par + nd = rsib + elif m == ')': + nd = nd.par + + elif m == ':': + edge_len_str = '' + i+=1 + m = tree[i] + assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] + while m in ['0','1','2','3','4','5','6','7','8', '9','.']: + edge_len_str += m + i+=1 + m = tree[i] + i -=1 + nd.edgelen = float(edge_len_str) + + + else: + internal_node_number += 1 + + if True: + assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m + mm = '' + while m in ['0','1','2','3','4','5','6','7','8', '9' ]: + + mm += m + + i += 1 + m = tree[i] + nd.number = int(mm) + i -= 1 + + i += 1 + + post = pre[:] + post.reverse() + return post + +def Makenewick(pre): + newickstring = '' + for i,nd in enumerate(pre): + if nd.lchild: + newickstring += '(' + + elif nd.rsib: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + newickstring += ',' + + else: + newickstring += '%d' %(nd.number) + newickstring += ':%.1f' % nd.edgelen + tmpnd = nd + while (tmpnd.par is not None) and (tmpnd.rsib is None): + newickstring += ')' + newickstring += ':%.1f' % tmpnd.par.edgelen + tmpnd = tmpnd.par + + if tmpnd.par is not None: + newickstring += ',' + return newickstring + +###################yule tree################################################### +# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree +# - num_species is the number of leaves (tips) in the tree +def calcPhi(num_species): + phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) + return phi + +# yuleTree creates a species tree in which edge lengths are measured in +# expected number of substitutions. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def yuleTree(num_species, mu_over_s): + # create num_species nodes numbered 1, 2, ..., num_species + nodes = [node(i+1) for i in range(num_species)] + + next_node_number = num_species + 1 + while len(nodes) > 1: + # choose a speciation time in generations + K = float(len(nodes)) + mean_epoch_length = mu_over_s/K + t = random.gammavariate(1.0, mean_epoch_length) + + # update each node's edgelen + for n in nodes: + n.edgelen += t # same as: n.edgelen = n.edgelen + t + + nodes = joinRandomPair(nodes, next_node_number, False) + next_node_number += 1 + + return nodes[0] + +# calcExpectedHeight returns the expected height of the species tree in terms of +# expected number of substitutions from the root to one tip. +# - num_species is the number of leaves +# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio +def calcExpectedHeight(num_species, mu_over_s): + return mu_over_s*calcPhi(num_species) + + +if __name__ == '__main__': + random_seed = 24553 # 7632557, 12345 + number_of_species = 5 + mutation_speciation_rate_ratio = 0.4 # 0.689655172 # yields tree height 1 for 6 species + random.seed(random_seed) + species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) +# print '#########' +# print species_tree_root + newick = makeNewick(species_tree_root) +# print 'Random number seed: %d' % random_seed +# print 'Simulating one tree:' +# print ' number of species = %d' % number_of_species +# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio +# print ' actual tree length =',newick[1] + expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) +# print ' expected height =',expected_height + actual_height = calcActualHeight(species_tree_root) +# print ' actual height =',actual_height +# print ' newick: ',newick[0] + + +# yuletree = '(((1:0.03915,5:0.03915):0.387,(4:0.42253,2:0.42253):0.004):0.118,3:0.54433)' + + postorder = readnewick(treenewick()) + result = prepareTree(postorder, readSeq.patterns()) \ No newline at end of file diff --git a/brnlenMCMC.py b/brnlenMCMC.py deleted file mode 100644 index 87c6160..0000000 --- a/brnlenMCMC.py +++ /dev/null @@ -1,494 +0,0 @@ -########################################################################################## -# This script reads a nexus DNA matrix (through module readseq.py) and a newick tree -# topology, and computes log-likelihood of the topology under Jukes Cantor+GAMMA model, -# and performs MCMC on branch length parameter -########################################################################################## - - -import readSeq -import random -import re, os, itertools, sys, glob -from itertools import chain -from scipy.stats import gamma -from math import exp, log - - -########################################################################################## -tree_file_name = 'tree.tre' -sequence_file = 'example3.nex' -alpha = 0.5 #gamma shape parameter for rate categories -n_gen = 50000 -save_every = 50 -mean_expo = 10. #mean_expo = mean of exponential distribution for branch length prior -########################################################################################## - - - - -class node(object): - def __init__(self, ndnum): # initialization function - self.rsib = None # right sibling - self.lchild = None # left child - self.par = None # parent node - self.number = ndnum # node number (internals negative, tips 0 or positive) - self.edgelen = 0.0 # branch length - self.descendants = set([ndnum]) # set containing descendant leaf set - self.partial = None # will have length 4*npatterns - - - def __str__(self): - # __str__ is a built-in function that is used by print to show an object - descendants_as_string = ','.join(['%d' % d for d in self.descendants]) - - lchildstr = 'None' - if self.lchild is not None: - lchildstr = '%d' % self.lchild.number - - rsibstr = 'None' - if self.rsib is not None: - rsibstr = '%d' % self.rsib.number - - parstr = 'None' - if self.par is not None: - parstr = '%d' % self.par.number - - return 'node: number=%d edgelen=%g lchild=%s rsib=%s parent=%s descendants=[%s]' % (self.number, self.edgelen, lchildstr, rsibstr, parstr, descendants_as_string) - - - -def allocatePartial(node, patterns, rates): - if node.number > 0: - npatterns = len(patterns) - - if node.partial is None: - node.partial = [0.0]*(4*4*npatterns) -# print len(node.partial) - for i,pattern in enumerate(patterns.keys()): - base = pattern[node.number-1] - for l in range(4): - if base == 'A': - - - node.partial[i*16+l*4 + 0] = 1.0 - elif base == 'C': - node.partial[i*16+l*4 + 1] = 1.0 - elif base == 'G': - node.partial[i*16+l*4 + 2] = 1.0 - elif base == 'T': - node.partial[i*16+l*4 + 3] = 1.0 - else: - assert(False), 'oops, something went horribly wrong!' - - else: - - npatterns = len(patterns) - if node.partial is None: - node.partial = [0.0]*(4*4*npatterns) - - like_list = [] - for i,pattern in enumerate(patterns.keys()): - m_list = [] - num_pattern = patterns[pattern] - - for l,m in enumerate(rates): - - psame = (0.25+0.75*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - pdiff = (0.25-0.25*exp(-4.0*m*(node.lchild.edgelen)/3.0)) - - psame2 = (0.25+0.75*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - pdiff2 = (0.25-0.25*exp(-4.0*m*(node.lchild.rsib.edgelen)/3.0)) - - num_pattern = patterns[pattern] - pAA = psame*(node.lchild.partial[i*16+l*4 + 0]) - pAC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pAG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pAT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pAA2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pAC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pAG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pAT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromA_lchild = pAA+pAC+pAG+pAT - pfromA_rchild = pAA2+pAC2+pAG2+pAT2 - node.partial[i*16+l*4 + 0] = pfromA_lchild*pfromA_rchild - - - ###################################################### - - pCA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pCC = psame*(node.lchild.partial[i*16+l*4 + 1]) - pCG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pCT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pCA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pCC2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pCG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pCT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromC_lchild = pCA+pCC+pCG+pCT - pfromC_rchild = pCA2+pCC2+pCG2+pCT2 - node.partial[i*16+l*4 + 1] = pfromC_lchild*pfromC_rchild - - ####################################################### -# - pGA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pGC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pGG = psame*(node.lchild.partial[i*16+l*4 + 2]) - pGT = pdiff*(node.lchild.partial[i*16+l*4 + 3]) - - pGA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pGC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pGG2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pGT2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromG_lchild = pGA+pGC+pGG+pGT - pfromG_rchild = pGA2+pGC2+pGG2+pGT2 - node.partial[i*16+l*4 + 2] = pfromG_lchild*pfromG_rchild - - ####################################################### - - pTA = pdiff*(node.lchild.partial[i*16+l*4 + 0]) - pTC = pdiff*(node.lchild.partial[i*16+l*4 + 1]) - pTG = pdiff*(node.lchild.partial[i*16+l*4 + 2]) - pTT = psame*(node.lchild.partial[i*16+l*4 + 3]) - - pTA2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 0]) - pTC2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 1]) - pTG2 = pdiff2*(node.lchild.rsib.partial[i*16+l*4 + 2]) - pTT2 = psame2*(node.lchild.rsib.partial[i*16+l*4 + 3]) - - pfromT_lchild = pTA+pTC+pTG+pTT - pfromT_rchild = pTA2+pTC2+pTG2+pTT2 - node.partial[i*16+l*4 + 3] = pfromT_lchild*pfromT_rchild - - site_like = (sum(node.partial[i*16:i*16+16]))*0.25*0.25 - site_log_like = (log(site_like))*num_pattern - like_list.append(site_log_like) - log_like = sum(like_list) - return log_like - - - -def mcmcbrn(postorder, patterns, rates): - nodes = readnewick(treenewick()) - mcmc = 0 - output = os.path.join('brnlenMCMC_results.txt') - newf = open(output, 'w') - newf.write('%s\t'%('n_gen')) - newf.write( '%s\t%s\t'%('LnL','LnPr')) - for nl in postorder: - newf.write( 'node%s\t'%(nl.number)) - newf.write('\n') - start_log_prior = 0.0 - for nd in nodes: - start_log_prior += (-nd.edgelen/mean_expo)-(log(mean_expo)) - start_log_like = prepareTree(nodes, patterns, rates) - - - newf.write('%s\t'%(mcmc)) - print 'mcmc gen=', mcmc - print start_log_like, start_log_prior, - - newf.write( '%.6f\t%.6f\t'%(start_log_like,start_log_prior)) - for nl in postorder: - newf.write( '%.6f\t'%(nl.edgelen)) - print nl.edgelen, - print - print '**************************' - - newf.write('\n') -# print - for r in range(n_gen): - for i in range(len(postorder)): - preedgelen = nodes[i].edgelen - currentlike = prepareTree(nodes, patterns, rates) -# currentlike = 0.0 - currentprior = 0.0 - for nd in nodes: - currentprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) - current_ln_posterior = currentlike + currentprior - - u = random.random() - m = exp(0.2*(u-0.5)) - nodes[i].edgelen = preedgelen*m - proposedprior = 0.0 - for nd in nodes: - proposedprior += (-nd.edgelen/mean_expo)-(log(mean_expo)) - - proposedlike = prepareTree(nodes, patterns, rates) - proposed_ln_posterior = proposedlike + proposedprior - hastings_ratio = log(m) - logR = proposed_ln_posterior - current_ln_posterior + hastings_ratio - u2 = random.random() - if log(u2) < logR: - nodes[i].edgelen = nodes[i].edgelen - log_prior = proposedprior - log_likelihood = proposedlike -# print 'log(u2) < logR so new proposal accepted..' - else: - nodes[i].edgelen = preedgelen - log_prior = currentprior - log_likelihood = currentlike - -# print 'log(u2) > logR so failed to accept new proposal..' - - if (r+1) % save_every == 0: - newf.write('%s\t'%(mcmc+1)) - print 'mcmc gen=', mcmc+1 - print log_likelihood,log_prior, - newf.write( '%.6f\t%.6f\t'%(log_likelihood,log_prior)) - for j,k in enumerate(nodes): - newf.write( '%.6f\t'%(k.edgelen)) - print k.edgelen, - newf.write('\n') - print - print '**************************' - - newf.flush() - - mcmc+=1 - - -def treenewick(): - script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - path = os.path.join(script_dir, tree_file_name) - with open(path, 'r') as content: - newick = content.read() - return newick -# - -def gammaRates(alpha): - bounds = [0.0, 0.25, 0.50, 0.75, 1.] - rates = [] - for i in range(4): -# print i - lower = gamma.ppf(bounds[i], alpha, 0, 1./alpha) - upper = gamma.ppf(bounds[i+1], alpha, 0, 1./alpha) - mean_rate = ((gamma.cdf(upper, alpha+1., 0, 1./alpha) - gamma.cdf(lower, alpha+1., 0, 1./alpha)))*4. - rates.append(mean_rate) - return rates - -def prepareTree(postorder, patterns, rates): - likelihood_lists = [] - for nd in postorder: - likelihood_lists.append(allocatePartial(nd, patterns, rates)) -# print 'log-likelihood of the topology =', likelihood_lists[-1] - return likelihood_lists[-1] - -def joinRandomPair(node_list, next_node_number, is_deep_coalescence): - # pick first of two lineages to join and delete from node_list - i = random.randint(1, len(node_list)) - ndi = node_list[i-1] - del node_list[i-1] - - # pick second of two lineages to join and delete from node_list - j = random.randint(1, len(node_list)) - ndj = node_list[j-1] - del node_list[j-1] - - # join selected nodes and add ancestor to node_list - ancnd = node(next_node_number) - ancnd.deep = is_deep_coalescence - ancnd.lchild = ndi - ancnd.descendants = set() - ancnd.descendants |= ndi.descendants - ancnd.descendants |= ndj.descendants - ndi.rsib = ndj - ndi.par = ancnd - ndj.par = ancnd - node_list.append(ancnd) - - return node_list - - -def makeNewick(nd, brlen_scaler = 1.0, start = True): # - global _newick - global _TL - - if start: - _newick = '' - _TL = 0.0 - - if nd.lchild: - _newick += '(' - makeNewick(nd.lchild, brlen_scaler, False) - - else: - blen = nd.edgelen*brlen_scaler - _TL += blen - _newick += '%d:%.5f' % (nd.number, blen) - - if nd.rsib: - _newick += ',' - makeNewick(nd.rsib, brlen_scaler, False) - elif nd.par is not None: - blen = nd.par.edgelen*brlen_scaler - _TL += blen - _newick += '):%.3f' % blen - - return _newick, _TL - -def calcActualHeight(root): - h = 0.0 - nd = root - while nd.lchild: - nd = nd.lchild - h += nd.edgelen - return h - - -def readnewick(tree): - total_length = len(tree) - internal_node_number = -1 - - root = node(internal_node_number) - nd = root - i = 0 - pre = [root] - while i < total_length: - m = tree[i] - - if m =='(': - internal_node_number -= 1 - - child = node(internal_node_number) - pre.append(child) - nd.lchild=child - - child.par=nd - nd=child - elif m == ',': - internal_node_number -= 1 - rsib = node(internal_node_number) - pre.append(rsib) - nd.rsib = rsib - rsib.par=nd.par - nd = rsib - elif m == ')': - nd = nd.par - - elif m == ':': - edge_len_str = '' - i+=1 - m = tree[i] - assert m in ['0','1','2','3','4','5','6','7','8', '9','.'] - while m in ['0','1','2','3','4','5','6','7','8', '9','.']: - edge_len_str += m - i+=1 - m = tree[i] - i -=1 - nd.edgelen = float(edge_len_str) - - - else: - internal_node_number += 1 - - if True: - assert m in ['0','1','2','3','4','5','6','7','8', '9'], 'Error : expecting m to be a digit when in fact it was "%s"' % m - mm = '' - while m in ['0','1','2','3','4','5','6','7','8', '9' ]: - - mm += m - - i += 1 - m = tree[i] - nd.number = int(mm) - i -= 1 - - i += 1 - - post = pre[:] - post.reverse() - return post - -def Makenewick(pre): - newickstring = '' - for i,nd in enumerate(pre): - if nd.lchild: - newickstring += '(' - - elif nd.rsib: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - newickstring += ',' - - else: - newickstring += '%d' %(nd.number) - newickstring += ':%.1f' % nd.edgelen - tmpnd = nd - while (tmpnd.par is not None) and (tmpnd.rsib is None): - newickstring += ')' - newickstring += ':%.1f' % tmpnd.par.edgelen - tmpnd = tmpnd.par - - if tmpnd.par is not None: - newickstring += ',' - return newickstring - -###################yule tree################################################### -# calcPhi computes sum_{K=2}^S 1/K, where S is the number of leaves in the tree -# - num_species is the number of leaves (tips) in the tree -def calcPhi(num_species): - phi = sum([1.0/(K+2.0) for K in range(num_species-1)]) - return phi - -# yuleTree creates a species tree in which edge lengths are measured in -# expected number of substitutions. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def yuleTree(num_species, mu_over_s): - # create num_species nodes numbered 1, 2, ..., num_species - nodes = [node(i+1) for i in range(num_species)] - - next_node_number = num_species + 1 - while len(nodes) > 1: - # choose a speciation time in generations - K = float(len(nodes)) - mean_epoch_length = mu_over_s/K - t = random.gammavariate(1.0, mean_epoch_length) - - # update each node's edgelen - for n in nodes: - n.edgelen += t # same as: n.edgelen = n.edgelen + t - - nodes = joinRandomPair(nodes, next_node_number, False) - next_node_number += 1 - - return nodes[0] - -# calcExpectedHeight returns the expected height of the species tree in terms of -# expected number of substitutions from the root to one tip. -# - num_species is the number of leaves -# - mu_over_s is the mutations-per-generation/speciations-per-generation rate ratio -def calcExpectedHeight(num_species, mu_over_s): - return mu_over_s*calcPhi(num_species) - - -if __name__ == '__main__': - random_seed = 348889 # 7632557, 12345 - number_of_species = 5 - mutation_speciation_rate_ratio = 0.689655172 # 0.689655172 # yields tree height 1 for 6 species - random.seed(random_seed) - species_tree_root = yuleTree(number_of_species, mutation_speciation_rate_ratio) -# print '#########' -# print species_tree_root - newick = makeNewick(species_tree_root) -# print 'Random number seed: %d' % random_seed -# print 'Simulating one tree:' -# print ' number of species = %d' % number_of_species -# print ' mutation-speciation rate ratio = %g' % mutation_speciation_rate_ratio -# print ' actual tree length =',newick[1] - expected_height = calcExpectedHeight(number_of_species, mutation_speciation_rate_ratio) -# print ' expected height =',expected_height - actual_height = calcActualHeight(species_tree_root) -# print ' actual height =',actual_height - print 'true tree: ',newick[0] - print '**************************' - -# yuletree = '(((1:0.54019,(5:0.40299,10:0.40299):0.1372):0.72686,(6:0.10576,4:0.10576):1.16129):0.42537,(2:0.58122,(9:0.21295,(7:0.16691,(8:0.14622,3:0.14622):0.02069):0.04604):0.36827):1.1112)' - rates_list = gammaRates(alpha) - postorder = readnewick(treenewick()) - result = prepareTree(postorder, readSeq.patterns(sequence_file), rates_list) -# try1 = readSeq.patterns() - result2 = mcmcbrn(postorder, readSeq.patterns(sequence_file), rates_list) diff --git a/tree.tre b/tree.tre deleted file mode 100644 index 2d84738..0000000 --- a/tree.tre +++ /dev/null @@ -1 +0,0 @@ -(5:1.8601,((3:0.47109,2:0.47109):0.492,(4:0.05805,1:0.05805):0.906):0.896) \ No newline at end of file