From ae973fe87db5f51c4624d33c823cbfd2af78d332 Mon Sep 17 00:00:00 2001 From: Luis Roberto Mercado Diaz Date: Mon, 6 May 2024 10:00:29 -0400 Subject: [PATCH] CAPSULE LOADING The following files are the code core, that is going to be defined as relevant on this repository --- .gitignore | 7 + REPRODUCING.md | 36 + code/Extrinsic_application_CVD_prediction.py | 128 + code/LICENSE | 21 + code/README.md | 266 + code/gene_extraction.py | 134 + code/lib/Bio/Affy/CelFile.py | 502 + code/lib/Bio/Affy/__init__.py | 6 + .../Affy/__pycache__/CelFile.cpython-37.pyc | Bin 0 -> 10908 bytes .../Affy/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 226 bytes code/lib/Bio/Align/AlignInfo.py | 594 + .../Bio/Align/Applications/_ClustalOmega.py | 269 + code/lib/Bio/Align/Applications/_Clustalw.py | 486 + code/lib/Bio/Align/Applications/_Dialign.py | 243 + code/lib/Bio/Align/Applications/_MSAProbs.py | 89 + code/lib/Bio/Align/Applications/_Mafft.py | 435 + code/lib/Bio/Align/Applications/_Muscle.py | 685 + code/lib/Bio/Align/Applications/_Prank.py | 236 + code/lib/Bio/Align/Applications/_Probcons.py | 137 + code/lib/Bio/Align/Applications/_TCoffee.py | 125 + code/lib/Bio/Align/Applications/__init__.py | 34 + .../__pycache__/_ClustalOmega.cpython-37.pyc | Bin 0 -> 6898 bytes .../__pycache__/_Clustalw.cpython-37.pyc | Bin 0 -> 14200 bytes .../__pycache__/_Dialign.cpython-37.pyc | Bin 0 -> 7383 bytes .../__pycache__/_MSAProbs.cpython-37.pyc | Bin 0 -> 3172 bytes .../__pycache__/_Mafft.cpython-37.pyc | Bin 0 -> 10327 bytes .../__pycache__/_Muscle.cpython-37.pyc | Bin 0 -> 11786 bytes .../__pycache__/_Prank.cpython-37.pyc | Bin 0 -> 7138 bytes .../__pycache__/_Probcons.cpython-37.pyc | Bin 0 -> 4271 bytes .../__pycache__/_TCoffee.cpython-37.pyc | Bin 0 -> 3789 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 915 bytes code/lib/Bio/Align/__init__.py | 2326 ++ .../__pycache__/AlignInfo.cpython-37.pyc | Bin 0 -> 16101 bytes .../Align/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 70400 bytes code/lib/Bio/Align/_aligners.c | 6988 ++++++ .../Bio/Align/_aligners.cp37-win_amd64.pyd | Bin 0 -> 146432 bytes .../Align/substitution_matrices/__init__.py | 514 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 13211 bytes .../Align/substitution_matrices/data/BENNER22 | 27 + .../Align/substitution_matrices/data/BENNER6 | 27 + .../Align/substitution_matrices/data/BENNER74 | 27 + .../Align/substitution_matrices/data/BLOSUM45 | 31 + .../Align/substitution_matrices/data/BLOSUM50 | 31 + .../Align/substitution_matrices/data/BLOSUM62 | 31 + .../Align/substitution_matrices/data/BLOSUM80 | 31 + .../Align/substitution_matrices/data/BLOSUM90 | 31 + .../Align/substitution_matrices/data/DAYHOFF | 27 + .../Bio/Align/substitution_matrices/data/FENG | 26 + .../Align/substitution_matrices/data/GENETIC | 27 + .../substitution_matrices/data/GONNET1992 | 26 + .../Align/substitution_matrices/data/HOXD70 | 9 + .../Align/substitution_matrices/data/JOHNSON | 27 + .../Align/substitution_matrices/data/JONES | 26 + .../Align/substitution_matrices/data/LEVIN | 27 + .../substitution_matrices/data/MCLACHLAN | 27 + .../Align/substitution_matrices/data/MDM78 | 27 + .../Align/substitution_matrices/data/NUC.4.4 | 25 + .../Align/substitution_matrices/data/PAM250 | 34 + .../Align/substitution_matrices/data/PAM30 | 34 + .../Align/substitution_matrices/data/PAM70 | 34 + .../Bio/Align/substitution_matrices/data/RAO | 27 + .../Align/substitution_matrices/data/RISLER | 27 + .../substitution_matrices/data/SCHNEIDER | 70 + .../Bio/Align/substitution_matrices/data/STR | 26 + .../Align/substitution_matrices/data/TRANS | 12 + code/lib/Bio/AlignIO/ClustalIO.py | 305 + code/lib/Bio/AlignIO/EmbossIO.py | 219 + code/lib/Bio/AlignIO/FastaIO.py | 344 + code/lib/Bio/AlignIO/Interfaces.py | 160 + code/lib/Bio/AlignIO/MafIO.py | 833 + code/lib/Bio/AlignIO/MauveIO.py | 349 + code/lib/Bio/AlignIO/MsfIO.py | 331 + code/lib/Bio/AlignIO/NexusIO.py | 166 + code/lib/Bio/AlignIO/PhylipIO.py | 454 + code/lib/Bio/AlignIO/StockholmIO.py | 630 + code/lib/Bio/AlignIO/__init__.py | 480 + .../__pycache__/ClustalIO.cpython-37.pyc | Bin 0 -> 5394 bytes .../__pycache__/EmbossIO.cpython-37.pyc | Bin 0 -> 3863 bytes .../__pycache__/FastaIO.cpython-37.pyc | Bin 0 -> 7807 bytes .../__pycache__/Interfaces.cpython-37.pyc | Bin 0 -> 5379 bytes .../AlignIO/__pycache__/MafIO.cpython-37.pyc | Bin 0 -> 17323 bytes .../__pycache__/MauveIO.cpython-37.pyc | Bin 0 -> 8463 bytes .../AlignIO/__pycache__/MsfIO.cpython-37.pyc | Bin 0 -> 5805 bytes .../__pycache__/NexusIO.cpython-37.pyc | Bin 0 -> 5113 bytes .../__pycache__/PhylipIO.cpython-37.pyc | Bin 0 -> 11706 bytes .../__pycache__/StockholmIO.cpython-37.pyc | Bin 0 -> 16479 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 15520 bytes code/lib/Bio/Alphabet/__init__.py | 22 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1028 bytes code/lib/Bio/Application/__init__.py | 838 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 26089 bytes code/lib/Bio/Blast/Applications.py | 1602 ++ code/lib/Bio/Blast/NCBIWWW.py | 348 + code/lib/Bio/Blast/NCBIXML.py | 864 + code/lib/Bio/Blast/ParseBlastTable.py | 126 + code/lib/Bio/Blast/Record.py | 460 + code/lib/Bio/Blast/__init__.py | 7 + .../__pycache__/Applications.cpython-37.pyc | Bin 0 -> 47345 bytes .../Blast/__pycache__/NCBIWWW.cpython-37.pyc | Bin 0 -> 7872 bytes .../Blast/__pycache__/NCBIXML.cpython-37.pyc | Bin 0 -> 26720 bytes .../ParseBlastTable.cpython-37.pyc | Bin 0 -> 4207 bytes .../Blast/__pycache__/Record.cpython-37.pyc | Bin 0 -> 16339 bytes .../Blast/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 223 bytes code/lib/Bio/CAPS/__init__.py | 135 + .../CAPS/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 3785 bytes code/lib/Bio/Cluster/__init__.py | 1293 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 43422 bytes .../Bio/Cluster/_cluster.cp37-win_amd64.pyd | Bin 0 -> 129536 bytes code/lib/Bio/Cluster/cluster.c | 5061 ++++ code/lib/Bio/Cluster/cluster.h | 90 + code/lib/Bio/Cluster/clustermodule.c | 2457 ++ code/lib/Bio/Compass/__init__.py | 223 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 5847 bytes code/lib/Bio/Data/CodonTable.py | 1313 + code/lib/Bio/Data/IUPACData.py | 423 + code/lib/Bio/Data/SCOPData.py | 277 + code/lib/Bio/Data/__init__.py | 8 + .../__pycache__/CodonTable.cpython-37.pyc | Bin 0 -> 23111 bytes .../Data/__pycache__/IUPACData.cpython-37.pyc | Bin 0 -> 6323 bytes .../Data/__pycache__/SCOPData.cpython-37.pyc | Bin 0 -> 10171 bytes .../Data/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 228 bytes code/lib/Bio/Emboss/Applications.py | 1221 + code/lib/Bio/Emboss/Primer3.py | 183 + code/lib/Bio/Emboss/PrimerSearch.py | 80 + code/lib/Bio/Emboss/__init__.py | 8 + .../__pycache__/Applications.cpython-37.pyc | Bin 0 -> 38125 bytes .../Emboss/__pycache__/Primer3.cpython-37.pyc | Bin 0 -> 4411 bytes .../__pycache__/PrimerSearch.cpython-37.pyc | Bin 0 -> 2689 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 233 bytes code/lib/Bio/Entrez/DTDs/Docsum_3_0.dtd | 17 + code/lib/Bio/Entrez/DTDs/Docsum_3_0.mod.dtd | 1054 + code/lib/Bio/Entrez/DTDs/Docsum_3_1.dtd | 17 + code/lib/Bio/Entrez/DTDs/Docsum_3_1.mod.dtd | 1055 + code/lib/Bio/Entrez/DTDs/Docsum_3_2.dtd | 17 + code/lib/Bio/Entrez/DTDs/Docsum_3_2.mod.dtd | 1418 ++ code/lib/Bio/Entrez/DTDs/Docsum_3_3.dtd | 17 + code/lib/Bio/Entrez/DTDs/Docsum_3_3.mod.dtd | 1585 ++ code/lib/Bio/Entrez/DTDs/Docsum_3_4.dtd | 17 + code/lib/Bio/Entrez/DTDs/Docsum_3_4.mod.dtd | 1594 ++ code/lib/Bio/Entrez/DTDs/EMBL_General.dtd | 20 + code/lib/Bio/Entrez/DTDs/EMBL_General.mod.dtd | 133 + code/lib/Bio/Entrez/DTDs/GenBank_General.dtd | 20 + .../Bio/Entrez/DTDs/GenBank_General.mod.dtd | 65 + code/lib/Bio/Entrez/DTDs/HomoloGene.dtd | 89 + code/lib/Bio/Entrez/DTDs/HomoloGene.mod.dtd | 293 + code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.dtd | 17 + code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.mod.dtd | 491 + code/lib/Bio/Entrez/DTDs/MMDB.dtd | 98 + code/lib/Bio/Entrez/DTDs/MMDB.mod.dtd | 259 + .../Bio/Entrez/DTDs/MMDB_Chemical_graph.dtd | 98 + .../Entrez/DTDs/MMDB_Chemical_graph.mod.dtd | 561 + code/lib/Bio/Entrez/DTDs/MMDB_Features.dtd | 98 + .../lib/Bio/Entrez/DTDs/MMDB_Features.mod.dtd | 932 + .../Bio/Entrez/DTDs/MMDB_Structural_model.dtd | 98 + .../Entrez/DTDs/MMDB_Structural_model.mod.dtd | 676 + code/lib/Bio/Entrez/DTDs/NCBI_Access.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_Access.mod.dtd | 49 + code/lib/Bio/Entrez/DTDs/NCBI_Biblio.dtd | 20 + code/lib/Bio/Entrez/DTDs/NCBI_Biblio.mod.dtd | 690 + code/lib/Bio/Entrez/DTDs/NCBI_BioSource.dtd | 23 + .../Bio/Entrez/DTDs/NCBI_BioSource.mod.dtd | 200 + code/lib/Bio/Entrez/DTDs/NCBI_BioTree.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_BioTree.mod.dtd | 109 + code/lib/Bio/Entrez/DTDs/NCBI_Blast4.dtd | 95 + code/lib/Bio/Entrez/DTDs/NCBI_Blast4.mod.dtd | 1498 ++ code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.dtd | 89 + code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.mod.dtd | 138 + code/lib/Bio/Entrez/DTDs/NCBI_BlastOutput.dtd | 17 + .../Bio/Entrez/DTDs/NCBI_BlastOutput.mod.dtd | 273 + code/lib/Bio/Entrez/DTDs/NCBI_Cdd.dtd | 110 + code/lib/Bio/Entrez/DTDs/NCBI_Cdd.mod.dtd | 1088 + code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.dtd | 101 + code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.mod.dtd | 534 + code/lib/Bio/Entrez/DTDs/NCBI_Entity.mod.dtd | 13 + code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.mod.dtd | 747 + code/lib/Bio/Entrez/DTDs/NCBI_Entrezgene.dtd | 89 + .../Bio/Entrez/DTDs/NCBI_Entrezgene.mod.dtd | 394 + code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.mod.dtd | 97 + code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.mod.dtd | 407 + code/lib/Bio/Entrez/DTDs/NCBI_Gene.dtd | 20 + code/lib/Bio/Entrez/DTDs/NCBI_Gene.mod.dtd | 97 + code/lib/Bio/Entrez/DTDs/NCBI_General.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_General.mod.dtd | 333 + code/lib/Bio/Entrez/DTDs/NCBI_ID1Access.dtd | 92 + .../Bio/Entrez/DTDs/NCBI_ID1Access.mod.dtd | 218 + code/lib/Bio/Entrez/DTDs/NCBI_ID2Access.dtd | 95 + .../Bio/Entrez/DTDs/NCBI_ID2Access.mod.dtd | 759 + code/lib/Bio/Entrez/DTDs/NCBI_MedArchive.dtd | 35 + .../Bio/Entrez/DTDs/NCBI_MedArchive.mod.dtd | 271 + code/lib/Bio/Entrez/DTDs/NCBI_Medlars.dtd | 23 + code/lib/Bio/Entrez/DTDs/NCBI_Medlars.mod.dtd | 58 + code/lib/Bio/Entrez/DTDs/NCBI_Medline.dtd | 23 + code/lib/Bio/Entrez/DTDs/NCBI_Medline.mod.dtd | 245 + code/lib/Bio/Entrez/DTDs/NCBI_Mim.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_Mim.mod.dtd | 354 + code/lib/Bio/Entrez/DTDs/NCBI_Mime.dtd | 113 + code/lib/Bio/Entrez/DTDs/NCBI_Mime.mod.dtd | 251 + code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.mod.dtd | 133 + code/lib/Bio/Entrez/DTDs/NCBI_Organism.dtd | 20 + .../lib/Bio/Entrez/DTDs/NCBI_Organism.mod.dtd | 226 + code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.dtd | 38 + code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.mod.dtd | 1006 + code/lib/Bio/Entrez/DTDs/NCBI_PCSubstance.dtd | 29 + .../Bio/Entrez/DTDs/NCBI_PCSubstance.mod.dtd | 1628 ++ code/lib/Bio/Entrez/DTDs/NCBI_Project.dtd | 95 + code/lib/Bio/Entrez/DTDs/NCBI_Project.mod.dtd | 158 + code/lib/Bio/Entrez/DTDs/NCBI_Protein.dtd | 20 + code/lib/Bio/Entrez/DTDs/NCBI_Protein.mod.dtd | 75 + code/lib/Bio/Entrez/DTDs/NCBI_Pub.dtd | 26 + code/lib/Bio/Entrez/DTDs/NCBI_Pub.mod.dtd | 120 + code/lib/Bio/Entrez/DTDs/NCBI_PubMed.dtd | 26 + code/lib/Bio/Entrez/DTDs/NCBI_PubMed.mod.dtd | 64 + code/lib/Bio/Entrez/DTDs/NCBI_RNA.dtd | 86 + code/lib/Bio/Entrez/DTDs/NCBI_RNA.mod.dtd | 144 + code/lib/Bio/Entrez/DTDs/NCBI_Remap.dtd | 89 + code/lib/Bio/Entrez/DTDs/NCBI_Remap.mod.dtd | 158 + code/lib/Bio/Entrez/DTDs/NCBI_Rsite.dtd | 20 + code/lib/Bio/Entrez/DTDs/NCBI_Rsite.mod.dtd | 38 + code/lib/Bio/Entrez/DTDs/NCBI_ScoreMat.dtd | 92 + .../lib/Bio/Entrez/DTDs/NCBI_ScoreMat.mod.dtd | 579 + code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.mod.dtd | 150 + code/lib/Bio/Entrez/DTDs/NCBI_SeqTable.dtd | 86 + .../lib/Bio/Entrez/DTDs/NCBI_SeqTable.mod.dtd | 390 + code/lib/Bio/Entrez/DTDs/NCBI_Seq_split.dtd | 92 + .../Bio/Entrez/DTDs/NCBI_Seq_split.mod.dtd | 559 + code/lib/Bio/Entrez/DTDs/NCBI_Seqalign.dtd | 86 + .../lib/Bio/Entrez/DTDs/NCBI_Seqalign.mod.dtd | 570 + code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.dtd | 86 + code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.mod.dtd | 772 + code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.dtd | 86 + code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.mod.dtd | 325 + code/lib/Bio/Entrez/DTDs/NCBI_Seqres.dtd | 86 + code/lib/Bio/Entrez/DTDs/NCBI_Seqres.mod.dtd | 134 + code/lib/Bio/Entrez/DTDs/NCBI_Seqset.dtd | 89 + code/lib/Bio/Entrez/DTDs/NCBI_Seqset.mod.dtd | 138 + code/lib/Bio/Entrez/DTDs/NCBI_Sequence.dtd | 86 + .../lib/Bio/Entrez/DTDs/NCBI_Sequence.mod.dtd | 1112 + code/lib/Bio/Entrez/DTDs/NCBI_Submit.dtd | 92 + code/lib/Bio/Entrez/DTDs/NCBI_Submit.mod.dtd | 156 + code/lib/Bio/Entrez/DTDs/NCBI_Systems.dtd | 86 + code/lib/Bio/Entrez/DTDs/NCBI_TSeq.dtd | 17 + code/lib/Bio/Entrez/DTDs/NCBI_TSeq.mod.dtd | 66 + code/lib/Bio/Entrez/DTDs/NCBI_TxInit.dtd | 29 + code/lib/Bio/Entrez/DTDs/NCBI_TxInit.mod.dtd | 184 + code/lib/Bio/Entrez/DTDs/NCBI_Variation.dtd | 86 + .../Bio/Entrez/DTDs/NCBI_Variation.mod.dtd | 944 + code/lib/Bio/Entrez/DTDs/NCBI_all.dtd | 202 + code/lib/Bio/Entrez/DTDs/NSE.dtd | 17 + code/lib/Bio/Entrez/DTDs/NSE.mod.dtd | 895 + code/lib/Bio/Entrez/DTDs/OMSSA.dtd | 89 + code/lib/Bio/Entrez/DTDs/OMSSA.mod.dtd | 1361 ++ code/lib/Bio/Entrez/DTDs/PDB_General.dtd | 20 + code/lib/Bio/Entrez/DTDs/PDB_General.mod.dtd | 70 + code/lib/Bio/Entrez/DTDs/PIR_General.dtd | 86 + code/lib/Bio/Entrez/DTDs/PIR_General.mod.dtd | 78 + code/lib/Bio/Entrez/DTDs/PRF_General.dtd | 17 + code/lib/Bio/Entrez/DTDs/PRF_General.mod.dtd | 56 + code/lib/Bio/Entrez/DTDs/SP_General.dtd | 86 + code/lib/Bio/Entrez/DTDs/SP_General.mod.dtd | 94 + code/lib/Bio/Entrez/DTDs/XHTMLtablesetup.ent | 309 + code/lib/Bio/Entrez/DTDs/archivearticle.dtd | 952 + .../Bio/Entrez/DTDs/archivecustom-classes.ent | 157 + .../Bio/Entrez/DTDs/archivecustom-mixes.ent | 306 + .../Bio/Entrez/DTDs/archivecustom-models.ent | 756 + .../Bio/Entrez/DTDs/archivecustom-modules.ent | 116 + code/lib/Bio/Entrez/DTDs/articlemeta.ent | 1811 ++ code/lib/Bio/Entrez/DTDs/backmatter.ent | 277 + code/lib/Bio/Entrez/DTDs/bookdoc_100301.dtd | 78 + code/lib/Bio/Entrez/DTDs/bookdoc_110101.dtd | 78 + code/lib/Bio/Entrez/DTDs/bookdoc_120101.dtd | 78 + code/lib/Bio/Entrez/DTDs/bookdoc_130101.dtd | 82 + code/lib/Bio/Entrez/DTDs/bookdoc_140101.dtd | 82 + code/lib/Bio/Entrez/DTDs/bookdoc_150101.dtd | 82 + code/lib/Bio/Entrez/DTDs/chars.ent | 359 + code/lib/Bio/Entrez/DTDs/common.ent | 2790 +++ code/lib/Bio/Entrez/DTDs/default-classes.ent | 704 + code/lib/Bio/Entrez/DTDs/default-mixes.ent | 357 + code/lib/Bio/Entrez/DTDs/display.ent | 1468 ++ code/lib/Bio/Entrez/DTDs/eInfo_020511.dtd | 60 + code/lib/Bio/Entrez/DTDs/eLink_090910.dtd | 79 + code/lib/Bio/Entrez/DTDs/eLink_101123.dtd | 88 + code/lib/Bio/Entrez/DTDs/ePost_020511.dtd | 14 + code/lib/Bio/Entrez/DTDs/eSearch_020511.dtd | 64 + code/lib/Bio/Entrez/DTDs/eSpell.dtd | 20 + code/lib/Bio/Entrez/DTDs/eSummary_041029.dtd | 20 + code/lib/Bio/Entrez/DTDs/egquery.dtd | 22 + code/lib/Bio/Entrez/DTDs/einfo.dtd | 62 + code/lib/Bio/Entrez/DTDs/elink_020122.dtd | 20 + code/lib/Bio/Entrez/DTDs/esearch.dtd | 103 + code/lib/Bio/Entrez/DTDs/esummary-v1.dtd | 20 + code/lib/Bio/Entrez/DTDs/format.ent | 412 + code/lib/Bio/Entrez/DTDs/htmltable.dtd | 334 + code/lib/Bio/Entrez/DTDs/isoamsa.ent | 167 + code/lib/Bio/Entrez/DTDs/isoamsb.ent | 143 + code/lib/Bio/Entrez/DTDs/isoamsc.ent | 43 + code/lib/Bio/Entrez/DTDs/isoamsn.ent | 114 + code/lib/Bio/Entrez/DTDs/isoamso.ent | 73 + code/lib/Bio/Entrez/DTDs/isoamsr.ent | 204 + code/lib/Bio/Entrez/DTDs/isobox.ent | 61 + code/lib/Bio/Entrez/DTDs/isocyr1.ent | 88 + code/lib/Bio/Entrez/DTDs/isocyr2.ent | 47 + code/lib/Bio/Entrez/DTDs/isodia.ent | 35 + code/lib/Bio/Entrez/DTDs/isogrk1.ent | 1 + code/lib/Bio/Entrez/DTDs/isogrk2.ent | 1 + code/lib/Bio/Entrez/DTDs/isogrk3.ent | 64 + code/lib/Bio/Entrez/DTDs/isogrk4.ent | 69 + code/lib/Bio/Entrez/DTDs/isolat1.ent | 83 + code/lib/Bio/Entrez/DTDs/isolat2.ent | 142 + code/lib/Bio/Entrez/DTDs/isomfrk.ent | 75 + code/lib/Bio/Entrez/DTDs/isomopf.ent | 49 + code/lib/Bio/Entrez/DTDs/isomscr.ent | 75 + code/lib/Bio/Entrez/DTDs/isonum.ent | 97 + code/lib/Bio/Entrez/DTDs/isopub.ent | 105 + code/lib/Bio/Entrez/DTDs/isotech.ent | 182 + code/lib/Bio/Entrez/DTDs/journalmeta.ent | 341 + code/lib/Bio/Entrez/DTDs/link.ent | 510 + code/lib/Bio/Entrez/DTDs/list.ent | 465 + code/lib/Bio/Entrez/DTDs/math.ent | 329 + code/lib/Bio/Entrez/DTDs/mathml-in-pubmed.mod | 151 + code/lib/Bio/Entrez/DTDs/mathml2-qname-1.mod | 1 + code/lib/Bio/Entrez/DTDs/mathml2.dtd | 1960 ++ code/lib/Bio/Entrez/DTDs/mathml3-qname1.mod | 294 + code/lib/Bio/Entrez/DTDs/mathml3.dtd | 1682 ++ code/lib/Bio/Entrez/DTDs/mathmlsetup.ent | 191 + code/lib/Bio/Entrez/DTDs/mmlalias.ent | 564 + code/lib/Bio/Entrez/DTDs/mmlextra.ent | 122 + code/lib/Bio/Entrez/DTDs/modules.ent | 417 + .../Bio/Entrez/DTDs/nlm-articleset-2.0.dtd | 271 + .../DTDs/nlmcatalogrecordset_170601.dtd | 280 + code/lib/Bio/Entrez/DTDs/nlmcommon_011101.dtd | 175 + code/lib/Bio/Entrez/DTDs/nlmcommon_080101.dtd | 201 + code/lib/Bio/Entrez/DTDs/nlmcommon_090101.dtd | 220 + .../lib/Bio/Entrez/DTDs/nlmmedline_011101.dtd | 60 + .../lib/Bio/Entrez/DTDs/nlmmedline_080101.dtd | 71 + .../lib/Bio/Entrez/DTDs/nlmmedline_090101.dtd | 74 + .../Entrez/DTDs/nlmmedlinecitation_011101.dtd | 178 + .../Entrez/DTDs/nlmmedlinecitation_080101.dtd | 107 + .../Entrez/DTDs/nlmmedlinecitation_090101.dtd | 112 + .../DTDs/nlmmedlinecitationset_100101.dtd | 194 + .../DTDs/nlmmedlinecitationset_100301.dtd | 201 + .../DTDs/nlmmedlinecitationset_110101.dtd | 197 + .../DTDs/nlmmedlinecitationset_120101.dtd | 188 + .../DTDs/nlmmedlinecitationset_130101.dtd | 191 + .../DTDs/nlmmedlinecitationset_130501.dtd | 191 + .../DTDs/nlmmedlinecitationset_140101.dtd | 190 + .../DTDs/nlmmedlinecitationset_150101.dtd | 189 + .../lib/Bio/Entrez/DTDs/nlmserials_080101.dtd | 134 + .../lib/Bio/Entrez/DTDs/nlmserials_100101.dtd | 157 + .../Entrez/DTDs/nlmsharedcatcit_080101.dtd | 80 + .../Entrez/DTDs/nlmsharedcatcit_090101.dtd | 80 + code/lib/Bio/Entrez/DTDs/notat.ent | 172 + code/lib/Bio/Entrez/DTDs/para.ent | 420 + code/lib/Bio/Entrez/DTDs/phrase.ent | 278 + code/lib/Bio/Entrez/DTDs/pmc-1.dtd | 900 + code/lib/Bio/Entrez/DTDs/pubmed_020114.dtd | 61 + code/lib/Bio/Entrez/DTDs/pubmed_080101.dtd | 71 + code/lib/Bio/Entrez/DTDs/pubmed_090101.dtd | 71 + code/lib/Bio/Entrez/DTDs/pubmed_100101.dtd | 72 + code/lib/Bio/Entrez/DTDs/pubmed_100301.dtd | 79 + code/lib/Bio/Entrez/DTDs/pubmed_110101.dtd | 79 + code/lib/Bio/Entrez/DTDs/pubmed_120101.dtd | 79 + code/lib/Bio/Entrez/DTDs/pubmed_130101.dtd | 79 + code/lib/Bio/Entrez/DTDs/pubmed_130501.dtd | 79 + code/lib/Bio/Entrez/DTDs/pubmed_140101.dtd | 79 + code/lib/Bio/Entrez/DTDs/pubmed_150101.dtd | 79 + code/lib/Bio/Entrez/DTDs/pubmed_180101.dtd | 434 + code/lib/Bio/Entrez/DTDs/pubmed_180601.dtd | 454 + code/lib/Bio/Entrez/DTDs/pubmed_190101.dtd | 478 + code/lib/Bio/Entrez/DTDs/references.ent | 726 + code/lib/Bio/Entrez/DTDs/section.ent | 220 + code/lib/Bio/Entrez/DTDs/taxon.dtd | 131 + code/lib/Bio/Entrez/DTDs/xmlspecchars.ent | 290 + code/lib/Bio/Entrez/Parser.py | 1005 + code/lib/Bio/Entrez/XSDs/IPGReportSet.xsd | 97 + code/lib/Bio/Entrez/__init__.py | 696 + .../Entrez/__pycache__/Parser.cpython-311.pyc | Bin 0 -> 43582 bytes .../Entrez/__pycache__/Parser.cpython-312.pyc | Bin 0 -> 42256 bytes .../Entrez/__pycache__/Parser.cpython-37.pyc | Bin 0 -> 26153 bytes .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 27743 bytes .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 26469 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 23699 bytes code/lib/Bio/ExPASy/Enzyme.py | 157 + code/lib/Bio/ExPASy/Prodoc.py | 173 + code/lib/Bio/ExPASy/Prosite.py | 314 + code/lib/Bio/ExPASy/ScanProsite.py | 145 + code/lib/Bio/ExPASy/__init__.py | 131 + .../ExPASy/__pycache__/Enzyme.cpython-37.pyc | Bin 0 -> 3672 bytes .../ExPASy/__pycache__/Prodoc.cpython-37.pyc | Bin 0 -> 4360 bytes .../ExPASy/__pycache__/Prosite.cpython-37.pyc | Bin 0 -> 7686 bytes .../__pycache__/ScanProsite.cpython-37.pyc | Bin 0 -> 4609 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 4267 bytes .../__pycache__/cellosaurus.cpython-37.pyc | Bin 0 -> 5026 bytes code/lib/Bio/ExPASy/cellosaurus.py | 188 + code/lib/Bio/File.py | 609 + code/lib/Bio/GenBank/Record.py | 669 + code/lib/Bio/GenBank/Scanner.py | 1904 ++ code/lib/Bio/GenBank/__init__.py | 1746 ++ .../GenBank/__pycache__/Record.cpython-37.pyc | Bin 0 -> 19181 bytes .../__pycache__/Scanner.cpython-37.pyc | Bin 0 -> 37748 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 50901 bytes .../GenBank/__pycache__/utils.cpython-37.pyc | Bin 0 -> 2505 bytes code/lib/Bio/GenBank/utils.py | 68 + code/lib/Bio/Geo/Record.py | 92 + code/lib/Bio/Geo/__init__.py | 67 + .../Bio/Geo/__pycache__/Record.cpython-37.pyc | Bin 0 -> 2362 bytes .../Geo/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1419 bytes code/lib/Bio/Graphics/BasicChromosome.py | 823 + code/lib/Bio/Graphics/ColorSpiral.py | 206 + code/lib/Bio/Graphics/Comparative.py | 178 + .../lib/Bio/Graphics/DisplayRepresentation.py | 187 + code/lib/Bio/Graphics/Distribution.py | 258 + .../Graphics/GenomeDiagram/_AbstractDrawer.py | 565 + .../Graphics/GenomeDiagram/_CircularDrawer.py | 1725 ++ .../lib/Bio/Graphics/GenomeDiagram/_Colors.py | 234 + .../Bio/Graphics/GenomeDiagram/_CrossLink.py | 100 + .../Bio/Graphics/GenomeDiagram/_Diagram.py | 411 + .../Bio/Graphics/GenomeDiagram/_Feature.py | 198 + .../Bio/Graphics/GenomeDiagram/_FeatureSet.py | 210 + code/lib/Bio/Graphics/GenomeDiagram/_Graph.py | 195 + .../Bio/Graphics/GenomeDiagram/_GraphSet.py | 171 + .../Graphics/GenomeDiagram/_LinearDrawer.py | 1580 ++ code/lib/Bio/Graphics/GenomeDiagram/_Track.py | 285 + .../Bio/Graphics/GenomeDiagram/__init__.py | 37 + .../_AbstractDrawer.cpython-37.pyc | Bin 0 -> 13776 bytes .../_CircularDrawer.cpython-37.pyc | Bin 0 -> 30597 bytes .../__pycache__/_Colors.cpython-37.pyc | Bin 0 -> 8424 bytes .../__pycache__/_CrossLink.cpython-37.pyc | Bin 0 -> 2984 bytes .../__pycache__/_Diagram.cpython-37.pyc | Bin 0 -> 11833 bytes .../__pycache__/_Feature.cpython-37.pyc | Bin 0 -> 6290 bytes .../__pycache__/_FeatureSet.cpython-37.pyc | Bin 0 -> 5886 bytes .../__pycache__/_Graph.cpython-37.pyc | Bin 0 -> 5710 bytes .../__pycache__/_GraphSet.cpython-37.pyc | Bin 0 -> 5098 bytes .../__pycache__/_LinearDrawer.cpython-37.pyc | Bin 0 -> 27966 bytes .../__pycache__/_Track.cpython-37.pyc | Bin 0 -> 9660 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 636 bytes code/lib/Bio/Graphics/KGML_vis.py | 443 + code/lib/Bio/Graphics/__init__.py | 90 + .../BasicChromosome.cpython-37.pyc | Bin 0 -> 23682 bytes .../__pycache__/ColorSpiral.cpython-37.pyc | Bin 0 -> 6890 bytes .../__pycache__/Comparative.cpython-37.pyc | Bin 0 -> 5220 bytes .../DisplayRepresentation.cpython-37.pyc | Bin 0 -> 5835 bytes .../__pycache__/Distribution.cpython-37.pyc | Bin 0 -> 7449 bytes .../__pycache__/KGML_vis.cpython-37.pyc | Bin 0 -> 11135 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1874 bytes code/lib/Bio/HMM/DynamicProgramming.py | 326 + code/lib/Bio/HMM/MarkovModel.py | 677 + code/lib/Bio/HMM/Trainer.py | 430 + code/lib/Bio/HMM/Utilities.py | 68 + code/lib/Bio/HMM/__init__.py | 5 + .../DynamicProgramming.cpython-37.pyc | Bin 0 -> 8356 bytes .../__pycache__/MarkovModel.cpython-37.pyc | Bin 0 -> 19980 bytes .../HMM/__pycache__/Trainer.cpython-37.pyc | Bin 0 -> 11866 bytes .../HMM/__pycache__/Utilities.cpython-37.pyc | Bin 0 -> 1282 bytes .../HMM/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 213 bytes code/lib/Bio/KEGG/Compound/__init__.py | 175 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 5575 bytes code/lib/Bio/KEGG/Enzyme/__init__.py | 328 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 10928 bytes code/lib/Bio/KEGG/Gene/__init__.py | 140 + .../Gene/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 4038 bytes code/lib/Bio/KEGG/KGML/KGML_parser.py | 189 + code/lib/Bio/KEGG/KGML/KGML_pathway.py | 859 + code/lib/Bio/KEGG/KGML/__init__.py | 16 + .../__pycache__/KGML_parser.cpython-37.pyc | Bin 0 -> 5540 bytes .../__pycache__/KGML_pathway.cpython-37.pyc | Bin 0 -> 29911 bytes .../KGML/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 387 bytes code/lib/Bio/KEGG/Map/__init__.py | 49 + .../Map/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1112 bytes code/lib/Bio/KEGG/REST.py | 315 + code/lib/Bio/KEGG/__init__.py | 97 + .../Bio/KEGG/__pycache__/REST.cpython-37.pyc | Bin 0 -> 5791 bytes .../KEGG/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 2344 bytes code/lib/Bio/LogisticRegression.py | 134 + code/lib/Bio/MarkovModel.py | 683 + code/lib/Bio/MaxEntropy.py | 340 + code/lib/Bio/Medline/__init__.py | 221 + .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 6265 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 5071 bytes code/lib/Bio/NMR/NOEtools.py | 100 + code/lib/Bio/NMR/__init__.py | 11 + .../NMR/__pycache__/NOEtools.cpython-37.pyc | Bin 0 -> 2739 bytes .../NMR/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 327 bytes .../NMR/__pycache__/xpktools.cpython-37.pyc | Bin 0 -> 7715 bytes code/lib/Bio/NMR/xpktools.py | 315 + code/lib/Bio/NaiveBayes.py | 214 + code/lib/Bio/Nexus/Nexus.py | 2157 ++ code/lib/Bio/Nexus/Nodes.py | 182 + code/lib/Bio/Nexus/StandardData.py | 119 + code/lib/Bio/Nexus/Trees.py | 979 + code/lib/Bio/Nexus/__init__.py | 8 + .../Nexus/__pycache__/Nexus.cpython-37.pyc | Bin 0 -> 56429 bytes .../Nexus/__pycache__/Nodes.cpython-37.pyc | Bin 0 -> 6011 bytes .../__pycache__/StandardData.cpython-37.pyc | Bin 0 -> 2860 bytes .../Nexus/__pycache__/Trees.cpython-37.pyc | Bin 0 -> 26787 bytes .../Nexus/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 249 bytes code/lib/Bio/Nexus/cnexus.c | 128 + code/lib/Bio/Nexus/cnexus.cp37-win_amd64.pyd | Bin 0 -> 11264 bytes code/lib/Bio/PDB/AbstractPropertyMap.py | 130 + code/lib/Bio/PDB/Atom.py | 566 + code/lib/Bio/PDB/Chain.py | 213 + code/lib/Bio/PDB/DSSP.py | 620 + code/lib/Bio/PDB/Dice.py | 78 + code/lib/Bio/PDB/Entity.py | 499 + code/lib/Bio/PDB/FragmentMapper.py | 326 + code/lib/Bio/PDB/HSExposure.py | 327 + code/lib/Bio/PDB/MMCIF2Dict.py | 128 + code/lib/Bio/PDB/MMCIFParser.py | 544 + code/lib/Bio/PDB/Model.py | 74 + code/lib/Bio/PDB/NACCESS.py | 212 + code/lib/Bio/PDB/NeighborSearch.py | 130 + code/lib/Bio/PDB/PDBExceptions.py | 31 + code/lib/Bio/PDB/PDBIO.py | 394 + code/lib/Bio/PDB/PDBList.py | 579 + code/lib/Bio/PDB/PDBParser.py | 425 + code/lib/Bio/PDB/PICIO.py | 386 + code/lib/Bio/PDB/PSEA.py | 118 + code/lib/Bio/PDB/Polypeptide.py | 479 + code/lib/Bio/PDB/QCPSuperimposer/__init__.py | 165 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 4892 bytes .../Bio/PDB/QCPSuperimposer/qcprotmodule.c | 250 + .../qcprotmodule.cp37-win_amd64.pyd | Bin 0 -> 14336 bytes code/lib/Bio/PDB/Residue.py | 161 + code/lib/Bio/PDB/ResidueDepth.py | 608 + code/lib/Bio/PDB/SASA.py | 251 + code/lib/Bio/PDB/SCADIO.py | 806 + code/lib/Bio/PDB/Selection.py | 84 + code/lib/Bio/PDB/Structure.py | 66 + code/lib/Bio/PDB/StructureAlignment.py | 97 + code/lib/Bio/PDB/StructureBuilder.py | 313 + code/lib/Bio/PDB/Superimposer.py | 57 + code/lib/Bio/PDB/__init__.py | 90 + .../AbstractPropertyMap.cpython-37.pyc | Bin 0 -> 4494 bytes .../Bio/PDB/__pycache__/Atom.cpython-37.pyc | Bin 0 -> 17412 bytes .../Bio/PDB/__pycache__/Chain.cpython-37.pyc | Bin 0 -> 6351 bytes .../Bio/PDB/__pycache__/DSSP.cpython-37.pyc | Bin 0 -> 12179 bytes .../Bio/PDB/__pycache__/Dice.cpython-37.pyc | Bin 0 -> 2327 bytes .../Bio/PDB/__pycache__/Entity.cpython-37.pyc | Bin 0 -> 15903 bytes .../__pycache__/FragmentMapper.cpython-37.pyc | Bin 0 -> 8944 bytes .../PDB/__pycache__/HSExposure.cpython-37.pyc | Bin 0 -> 8979 bytes .../PDB/__pycache__/MMCIF2Dict.cpython-37.pyc | Bin 0 -> 2347 bytes .../__pycache__/MMCIFParser.cpython-37.pyc | Bin 0 -> 12841 bytes .../Bio/PDB/__pycache__/Model.cpython-37.pyc | Bin 0 -> 2560 bytes .../PDB/__pycache__/NACCESS.cpython-37.pyc | Bin 0 -> 4946 bytes .../__pycache__/NeighborSearch.cpython-37.pyc | Bin 0 -> 3909 bytes .../__pycache__/PDBExceptions.cpython-37.pyc | Bin 0 -> 861 bytes .../Bio/PDB/__pycache__/PDBIO.cpython-37.pyc | Bin 0 -> 7649 bytes .../PDB/__pycache__/PDBList.cpython-37.pyc | Bin 0 -> 17069 bytes .../PDB/__pycache__/PDBParser.cpython-37.pyc | Bin 0 -> 8813 bytes .../Bio/PDB/__pycache__/PICIO.cpython-37.pyc | Bin 0 -> 9096 bytes .../Bio/PDB/__pycache__/PSEA.cpython-37.pyc | Bin 0 -> 2938 bytes .../__pycache__/Polypeptide.cpython-37.pyc | Bin 0 -> 11990 bytes .../PDB/__pycache__/Residue.cpython-37.pyc | Bin 0 -> 5022 bytes .../__pycache__/ResidueDepth.cpython-37.pyc | Bin 0 -> 13171 bytes .../Bio/PDB/__pycache__/SASA.cpython-37.pyc | Bin 0 -> 6986 bytes .../Bio/PDB/__pycache__/SCADIO.cpython-37.pyc | Bin 0 -> 29148 bytes .../PDB/__pycache__/Selection.cpython-37.pyc | Bin 0 -> 2422 bytes .../PDB/__pycache__/Structure.cpython-37.pyc | Bin 0 -> 2537 bytes .../StructureAlignment.cpython-37.pyc | Bin 0 -> 2407 bytes .../StructureBuilder.cpython-37.pyc | Bin 0 -> 7140 bytes .../__pycache__/Superimposer.cpython-37.pyc | Bin 0 -> 1918 bytes .../PDB/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1750 bytes .../PDB/__pycache__/ic_data.cpython-37.pyc | Bin 0 -> 8981 bytes .../PDB/__pycache__/ic_rebuild.cpython-37.pyc | Bin 0 -> 8719 bytes .../internal_coords.cpython-37.pyc | Bin 0 -> 85247 bytes .../PDB/__pycache__/mmcifio.cpython-37.pyc | Bin 0 -> 7914 bytes .../parse_pdb_header.cpython-37.pyc | Bin 0 -> 7175 bytes .../PDB/__pycache__/vectors.cpython-37.pyc | Bin 0 -> 15879 bytes code/lib/Bio/PDB/ic_data.py | 500 + code/lib/Bio/PDB/ic_rebuild.py | 381 + code/lib/Bio/PDB/internal_coords.py | 3492 +++ code/lib/Bio/PDB/kdtrees.c | 1461 ++ code/lib/Bio/PDB/kdtrees.cp37-win_amd64.pyd | Bin 0 -> 28160 bytes code/lib/Bio/PDB/mmcifio.py | 378 + code/lib/Bio/PDB/mmtf/DefaultParser.py | 235 + code/lib/Bio/PDB/mmtf/__init__.py | 50 + .../__pycache__/DefaultParser.cpython-37.pyc | Bin 0 -> 8336 bytes .../mmtf/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1649 bytes .../mmtf/__pycache__/mmtfio.cpython-37.pyc | Bin 0 -> 5875 bytes code/lib/Bio/PDB/mmtf/mmtfio.py | 257 + code/lib/Bio/PDB/parse_pdb_header.py | 328 + code/lib/Bio/PDB/vectors.py | 663 + code/lib/Bio/Pathway/Rep/Graph.py | 149 + code/lib/Bio/Pathway/Rep/MultiGraph.py | 196 + code/lib/Bio/Pathway/Rep/__init__.py | 14 + .../Rep/__pycache__/Graph.cpython-37.pyc | Bin 0 -> 5695 bytes .../Rep/__pycache__/MultiGraph.cpython-37.pyc | Bin 0 -> 6598 bytes .../Rep/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 454 bytes code/lib/Bio/Pathway/__init__.py | 314 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 11279 bytes code/lib/Bio/Phylo/Applications/_Fasttree.py | 598 + code/lib/Bio/Phylo/Applications/_Phyml.py | 289 + code/lib/Bio/Phylo/Applications/_Raxml.py | 404 + code/lib/Bio/Phylo/Applications/__init__.py | 19 + .../__pycache__/_Fasttree.cpython-37.pyc | Bin 0 -> 23570 bytes .../__pycache__/_Phyml.cpython-37.pyc | Bin 0 -> 8533 bytes .../__pycache__/_Raxml.cpython-37.pyc | Bin 0 -> 14971 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 571 bytes code/lib/Bio/Phylo/BaseTree.py | 1250 + code/lib/Bio/Phylo/CDAO.py | 50 + code/lib/Bio/Phylo/CDAOIO.py | 483 + code/lib/Bio/Phylo/Consensus.py | 640 + code/lib/Bio/Phylo/NeXML.py | 50 + code/lib/Bio/Phylo/NeXMLIO.py | 348 + code/lib/Bio/Phylo/Newick.py | 41 + code/lib/Bio/Phylo/NewickIO.py | 373 + code/lib/Bio/Phylo/NexusIO.py | 83 + code/lib/Bio/Phylo/PAML/__init__.py | 8 + .../PAML/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 197 bytes .../PAML/__pycache__/_paml.cpython-37.pyc | Bin 0 -> 4385 bytes .../__pycache__/_parse_baseml.cpython-37.pyc | Bin 0 -> 5001 bytes .../__pycache__/_parse_codeml.cpython-37.pyc | Bin 0 -> 9000 bytes .../__pycache__/_parse_yn00.cpython-37.pyc | Bin 0 -> 3512 bytes .../PAML/__pycache__/baseml.cpython-37.pyc | Bin 0 -> 5355 bytes .../PAML/__pycache__/chi2.cpython-37.pyc | Bin 0 -> 2950 bytes .../PAML/__pycache__/codeml.cpython-37.pyc | Bin 0 -> 6146 bytes .../PAML/__pycache__/yn00.cpython-37.pyc | Bin 0 -> 4049 bytes code/lib/Bio/Phylo/PAML/_paml.py | 136 + code/lib/Bio/Phylo/PAML/_parse_baseml.py | 262 + code/lib/Bio/Phylo/PAML/_parse_codeml.py | 481 + code/lib/Bio/Phylo/PAML/_parse_yn00.py | 154 + code/lib/Bio/Phylo/PAML/baseml.py | 202 + code/lib/Bio/Phylo/PAML/chi2.py | 136 + code/lib/Bio/Phylo/PAML/codeml.py | 214 + code/lib/Bio/Phylo/PAML/yn00.py | 145 + code/lib/Bio/Phylo/PhyloXML.py | 1587 ++ code/lib/Bio/Phylo/PhyloXMLIO.py | 950 + code/lib/Bio/Phylo/TreeConstruction.py | 1179 + code/lib/Bio/Phylo/__init__.py | 15 + .../Phylo/__pycache__/BaseTree.cpython-37.pyc | Bin 0 -> 39693 bytes .../Bio/Phylo/__pycache__/CDAO.cpython-37.pyc | Bin 0 -> 1361 bytes .../Phylo/__pycache__/CDAOIO.cpython-37.pyc | Bin 0 -> 10694 bytes .../__pycache__/Consensus.cpython-37.pyc | Bin 0 -> 21317 bytes .../Phylo/__pycache__/NeXML.cpython-37.pyc | Bin 0 -> 1380 bytes .../Phylo/__pycache__/NeXMLIO.cpython-37.pyc | Bin 0 -> 8396 bytes .../Phylo/__pycache__/Newick.cpython-37.pyc | Bin 0 -> 1308 bytes .../Phylo/__pycache__/NewickIO.cpython-37.pyc | Bin 0 -> 8355 bytes .../Phylo/__pycache__/NexusIO.cpython-37.pyc | Bin 0 -> 2686 bytes .../Phylo/__pycache__/PhyloXML.cpython-37.pyc | Bin 0 -> 48533 bytes .../__pycache__/PhyloXMLIO.cpython-37.pyc | Bin 0 -> 25388 bytes .../TreeConstruction.cpython-37.pyc | Bin 0 -> 32908 bytes .../Phylo/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 449 bytes .../__pycache__/_cdao_owl.cpython-37.pyc | Bin 0 -> 109074 bytes .../Bio/Phylo/__pycache__/_io.cpython-37.pyc | Bin 0 -> 2349 bytes .../Phylo/__pycache__/_utils.cpython-37.pyc | Bin 0 -> 14113 bytes code/lib/Bio/Phylo/_cdao_owl.py | 2887 +++ code/lib/Bio/Phylo/_io.py | 86 + code/lib/Bio/Phylo/_utils.py | 504 + code/lib/Bio/PopGen/GenePop/Controller.py | 945 + code/lib/Bio/PopGen/GenePop/EasyController.py | 196 + code/lib/Bio/PopGen/GenePop/FileParser.py | 347 + .../lib/Bio/PopGen/GenePop/LargeFileParser.py | 118 + code/lib/Bio/PopGen/GenePop/__init__.py | 223 + .../__pycache__/Controller.cpython-37.pyc | Bin 0 -> 29291 bytes .../__pycache__/EasyController.cpython-37.pyc | Bin 0 -> 7285 bytes .../__pycache__/FileParser.cpython-37.pyc | Bin 0 -> 8656 bytes .../LargeFileParser.cpython-37.pyc | Bin 0 -> 3418 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 6361 bytes code/lib/Bio/PopGen/__init__.py | 8 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 235 bytes code/lib/Bio/Restriction/PrintFormat.py | 485 + code/lib/Bio/Restriction/Restriction.py | 2694 +++ .../Bio/Restriction/Restriction_Dictionary.py | 19915 ++++++++++++++++ code/lib/Bio/Restriction/__init__.py | 189 + .../__pycache__/PrintFormat.cpython-37.pyc | Bin 0 -> 14267 bytes .../__pycache__/Restriction.cpython-37.pyc | Bin 0 -> 84859 bytes .../Restriction_Dictionary.cpython-37.pyc | Bin 0 -> 159749 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1810 bytes code/lib/Bio/SCOP/Cla.py | 128 + code/lib/Bio/SCOP/Des.py | 89 + code/lib/Bio/SCOP/Dom.py | 76 + code/lib/Bio/SCOP/Hie.py | 106 + code/lib/Bio/SCOP/Raf.py | 320 + code/lib/Bio/SCOP/Residues.py | 91 + code/lib/Bio/SCOP/__init__.py | 967 + .../Bio/SCOP/__pycache__/Cla.cpython-37.pyc | Bin 0 -> 3780 bytes .../Bio/SCOP/__pycache__/Des.cpython-37.pyc | Bin 0 -> 2563 bytes .../Bio/SCOP/__pycache__/Dom.cpython-37.pyc | Bin 0 -> 2151 bytes .../Bio/SCOP/__pycache__/Hie.cpython-37.pyc | Bin 0 -> 2704 bytes .../Bio/SCOP/__pycache__/Raf.cpython-37.pyc | Bin 0 -> 9150 bytes .../SCOP/__pycache__/Residues.cpython-37.pyc | Bin 0 -> 2491 bytes .../SCOP/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 25701 bytes code/lib/Bio/SVDSuperimposer/__init__.py | 201 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 5755 bytes code/lib/Bio/SearchIO/BlastIO/__init__.py | 419 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 23808 bytes .../__pycache__/blast_tab.cpython-37.pyc | Bin 0 -> 18630 bytes .../__pycache__/blast_text.cpython-37.pyc | Bin 0 -> 3304 bytes .../__pycache__/blast_xml.cpython-37.pyc | Bin 0 -> 21649 bytes code/lib/Bio/SearchIO/BlastIO/blast_tab.py | 894 + code/lib/Bio/SearchIO/BlastIO/blast_text.py | 143 + code/lib/Bio/SearchIO/BlastIO/blast_xml.py | 987 + code/lib/Bio/SearchIO/BlatIO.py | 751 + code/lib/Bio/SearchIO/ExonerateIO/__init__.py | 252 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 12639 bytes .../__pycache__/_base.cpython-37.pyc | Bin 0 -> 13137 bytes .../exonerate_cigar.cpython-37.pyc | Bin 0 -> 3078 bytes .../__pycache__/exonerate_text.cpython-37.pyc | Bin 0 -> 11555 bytes .../exonerate_vulgar.cpython-37.pyc | Bin 0 -> 5090 bytes code/lib/Bio/SearchIO/ExonerateIO/_base.py | 534 + .../SearchIO/ExonerateIO/exonerate_cigar.py | 109 + .../SearchIO/ExonerateIO/exonerate_text.py | 540 + .../SearchIO/ExonerateIO/exonerate_vulgar.py | 219 + code/lib/Bio/SearchIO/FastaIO.py | 601 + code/lib/Bio/SearchIO/HHsuiteIO/__init__.py | 17 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 545 bytes .../__pycache__/hhsuite2_text.cpython-37.pyc | Bin 0 -> 7097 bytes .../Bio/SearchIO/HHsuiteIO/hhsuite2_text.py | 234 + code/lib/Bio/SearchIO/HmmerIO/__init__.py | 304 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 20242 bytes .../HmmerIO/__pycache__/_base.cpython-37.pyc | Bin 0 -> 1189 bytes .../__pycache__/hmmer2_text.cpython-37.pyc | Bin 0 -> 7957 bytes .../__pycache__/hmmer3_domtab.cpython-37.pyc | Bin 0 -> 8843 bytes .../__pycache__/hmmer3_tab.cpython-37.pyc | Bin 0 -> 8064 bytes .../__pycache__/hmmer3_text.cpython-37.pyc | Bin 0 -> 9235 bytes code/lib/Bio/SearchIO/HmmerIO/_base.py | 45 + code/lib/Bio/SearchIO/HmmerIO/hmmer2_text.py | 374 + .../lib/Bio/SearchIO/HmmerIO/hmmer3_domtab.py | 375 + code/lib/Bio/SearchIO/HmmerIO/hmmer3_tab.py | 335 + code/lib/Bio/SearchIO/HmmerIO/hmmer3_text.py | 436 + .../Bio/SearchIO/InterproscanIO/__init__.py | 96 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 5020 bytes .../interproscan_xml.cpython-37.pyc | Bin 0 -> 4566 bytes .../InterproscanIO/interproscan_xml.py | 194 + code/lib/Bio/SearchIO/__init__.py | 684 + .../__pycache__/BlatIO.cpython-37.pyc | Bin 0 -> 25682 bytes .../__pycache__/FastaIO.cpython-37.pyc | Bin 0 -> 15972 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 27131 bytes .../__pycache__/_index.cpython-37.pyc | Bin 0 -> 1302 bytes .../__pycache__/_utils.cpython-37.pyc | Bin 0 -> 5799 bytes code/lib/Bio/SearchIO/_index.py | 34 + .../Bio/SearchIO/_legacy/NCBIStandalone.py | 1953 ++ .../lib/Bio/SearchIO/_legacy/ParserSupport.py | 380 + code/lib/Bio/SearchIO/_legacy/__init__.py | 5 + .../__pycache__/NCBIStandalone.cpython-37.pyc | Bin 0 -> 43639 bytes .../__pycache__/ParserSupport.cpython-37.pyc | Bin 0 -> 12170 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 256 bytes code/lib/Bio/SearchIO/_model/__init__.py | 59 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 2330 bytes .../_model/__pycache__/_base.cpython-37.pyc | Bin 0 -> 2395 bytes .../_model/__pycache__/hit.cpython-37.pyc | Bin 0 -> 14470 bytes .../_model/__pycache__/hsp.cpython-37.pyc | Bin 0 -> 41240 bytes .../_model/__pycache__/query.cpython-37.pyc | Bin 0 -> 27507 bytes code/lib/Bio/SearchIO/_model/_base.py | 68 + code/lib/Bio/SearchIO/_model/hit.py | 463 + code/lib/Bio/SearchIO/_model/hsp.py | 1230 + code/lib/Bio/SearchIO/_model/query.py | 743 + code/lib/Bio/SearchIO/_utils.py | 167 + code/lib/Bio/Seq.py | 3223 +++ code/lib/Bio/SeqFeature.py | 2224 ++ code/lib/Bio/SeqIO/AbiIO.py | 602 + code/lib/Bio/SeqIO/AceIO.py | 101 + code/lib/Bio/SeqIO/FastaIO.py | 426 + code/lib/Bio/SeqIO/GckIO.py | 230 + code/lib/Bio/SeqIO/IgIO.py | 128 + code/lib/Bio/SeqIO/InsdcIO.py | 1511 ++ code/lib/Bio/SeqIO/Interfaces.py | 376 + code/lib/Bio/SeqIO/NibIO.py | 170 + code/lib/Bio/SeqIO/PdbIO.py | 515 + code/lib/Bio/SeqIO/PhdIO.py | 158 + code/lib/Bio/SeqIO/PirIO.py | 292 + code/lib/Bio/SeqIO/QualityIO.py | 2297 ++ code/lib/Bio/SeqIO/SeqXmlIO.py | 669 + code/lib/Bio/SeqIO/SffIO.py | 1494 ++ code/lib/Bio/SeqIO/SnapGeneIO.py | 296 + code/lib/Bio/SeqIO/SwissIO.py | 142 + code/lib/Bio/SeqIO/TabIO.py | 139 + code/lib/Bio/SeqIO/TwoBitIO.py | 250 + code/lib/Bio/SeqIO/UniprotIO.py | 561 + code/lib/Bio/SeqIO/XdnaIO.py | 366 + code/lib/Bio/SeqIO/__init__.py | 1092 + .../SeqIO/__pycache__/AbiIO.cpython-37.pyc | Bin 0 -> 18225 bytes .../SeqIO/__pycache__/AceIO.cpython-37.pyc | Bin 0 -> 2996 bytes .../SeqIO/__pycache__/FastaIO.cpython-37.pyc | Bin 0 -> 13249 bytes .../SeqIO/__pycache__/GckIO.cpython-37.pyc | Bin 0 -> 4631 bytes .../Bio/SeqIO/__pycache__/IgIO.cpython-37.pyc | Bin 0 -> 3377 bytes .../SeqIO/__pycache__/InsdcIO.cpython-37.pyc | Bin 0 -> 31779 bytes .../__pycache__/Interfaces.cpython-37.pyc | Bin 0 -> 12031 bytes .../SeqIO/__pycache__/NibIO.cpython-37.pyc | Bin 0 -> 5635 bytes .../SeqIO/__pycache__/PdbIO.cpython-37.pyc | Bin 0 -> 14213 bytes .../SeqIO/__pycache__/PhdIO.cpython-37.pyc | Bin 0 -> 4781 bytes .../SeqIO/__pycache__/PirIO.cpython-37.pyc | Bin 0 -> 8769 bytes .../__pycache__/QualityIO.cpython-37.pyc | Bin 0 -> 80786 bytes .../SeqIO/__pycache__/SeqXmlIO.cpython-37.pyc | Bin 0 -> 18620 bytes .../SeqIO/__pycache__/SffIO.cpython-37.pyc | Bin 0 -> 35331 bytes .../__pycache__/SnapGeneIO.cpython-37.pyc | Bin 0 -> 7623 bytes .../SeqIO/__pycache__/SwissIO.cpython-37.pyc | Bin 0 -> 4006 bytes .../SeqIO/__pycache__/TabIO.cpython-37.pyc | Bin 0 -> 4991 bytes .../SeqIO/__pycache__/TwoBitIO.cpython-37.pyc | Bin 0 -> 6683 bytes .../__pycache__/UniprotIO.cpython-37.pyc | Bin 0 -> 14323 bytes .../SeqIO/__pycache__/XdnaIO.cpython-37.pyc | Bin 0 -> 8475 bytes .../SeqIO/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 36927 bytes .../SeqIO/__pycache__/_index.cpython-37.pyc | Bin 0 -> 16228 bytes code/lib/Bio/SeqIO/_index.py | 713 + code/lib/Bio/SeqIO/_twoBitIO.c | 480 + .../Bio/SeqIO/_twoBitIO.cp37-win_amd64.pyd | Bin 0 -> 14848 bytes code/lib/Bio/SeqRecord.py | 1372 ++ code/lib/Bio/SeqUtils/CheckSum.py | 145 + code/lib/Bio/SeqUtils/CodonUsage.py | 187 + code/lib/Bio/SeqUtils/CodonUsageIndices.py | 28 + code/lib/Bio/SeqUtils/IsoelectricPoint.py | 161 + code/lib/Bio/SeqUtils/MeltingTemp.py | 1136 + code/lib/Bio/SeqUtils/ProtParam.py | 356 + code/lib/Bio/SeqUtils/ProtParamData.py | 390 + code/lib/Bio/SeqUtils/__init__.py | 467 + .../__pycache__/CheckSum.cpython-37.pyc | Bin 0 -> 3183 bytes .../__pycache__/CodonUsage.cpython-37.pyc | Bin 0 -> 4954 bytes .../CodonUsageIndices.cpython-37.pyc | Bin 0 -> 1184 bytes .../IsoelectricPoint.cpython-37.pyc | Bin 0 -> 5604 bytes .../__pycache__/MeltingTemp.cpython-37.pyc | Bin 0 -> 37536 bytes .../__pycache__/ProtParam.cpython-37.pyc | Bin 0 -> 12795 bytes .../__pycache__/ProtParamData.cpython-37.pyc | Bin 0 -> 8069 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 13725 bytes .../SeqUtils/__pycache__/lcc.cpython-37.pyc | Bin 0 -> 3216 bytes code/lib/Bio/SeqUtils/lcc.py | 162 + code/lib/Bio/Sequencing/Ace.py | 594 + .../Bio/Sequencing/Applications/_Novoalign.py | 217 + .../Bio/Sequencing/Applications/__init__.py | 56 + .../__pycache__/_Novoalign.cpython-37.pyc | Bin 0 -> 6955 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 1497 bytes .../__pycache__/_bwa.cpython-37.pyc | Bin 0 -> 24713 bytes .../__pycache__/_samtools.cpython-37.pyc | Bin 0 -> 32408 bytes code/lib/Bio/Sequencing/Applications/_bwa.py | 640 + .../Bio/Sequencing/Applications/_samtools.py | 1035 + code/lib/Bio/Sequencing/Phd.py | 199 + code/lib/Bio/Sequencing/__init__.py | 12 + .../Sequencing/__pycache__/Ace.cpython-37.pyc | Bin 0 -> 13936 bytes .../Sequencing/__pycache__/Phd.cpython-37.pyc | Bin 0 -> 4226 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 414 bytes code/lib/Bio/SubsMat/FreqTable.py | 107 + code/lib/Bio/SubsMat/MatrixInfo.py | 2724 +++ code/lib/Bio/SubsMat/__init__.py | 600 + .../__pycache__/FreqTable.cpython-37.pyc | Bin 0 -> 2904 bytes .../__pycache__/MatrixInfo.cpython-37.pyc | Bin 0 -> 57568 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 17477 bytes code/lib/Bio/SwissProt/KeyWList.py | 91 + code/lib/Bio/SwissProt/__init__.py | 861 + .../__pycache__/KeyWList.cpython-37.pyc | Bin 0 -> 3001 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 20224 bytes code/lib/Bio/TogoWS/__init__.py | 376 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 11526 bytes code/lib/Bio/UniGene/__init__.py | 340 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 12121 bytes code/lib/Bio/UniProt/GOA.py | 497 + code/lib/Bio/UniProt/__init__.py | 17 + .../UniProt/__pycache__/GOA.cpython-37.pyc | Bin 0 -> 11358 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 544 bytes code/lib/Bio/Wise/__init__.py | 151 + .../Wise/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 4012 bytes .../Bio/Wise/__pycache__/dnal.cpython-37.pyc | Bin 0 -> 4426 bytes .../Bio/Wise/__pycache__/psw.cpython-37.pyc | Bin 0 -> 5032 bytes code/lib/Bio/Wise/dnal.py | 175 + code/lib/Bio/Wise/psw.py | 172 + code/lib/Bio/__init__.py | 129 + code/lib/Bio/__pycache__/File.cpython-37.pyc | Bin 0 -> 16268 bytes .../LogisticRegression.cpython-37.pyc | Bin 0 -> 3340 bytes .../__pycache__/MarkovModel.cpython-37.pyc | Bin 0 -> 16086 bytes .../Bio/__pycache__/MaxEntropy.cpython-37.pyc | Bin 0 -> 9390 bytes .../Bio/__pycache__/NaiveBayes.cpython-37.pyc | Bin 0 -> 5369 bytes code/lib/Bio/__pycache__/Seq.cpython-37.pyc | Bin 0 -> 105531 bytes .../Bio/__pycache__/SeqFeature.cpython-37.pyc | Bin 0 -> 75669 bytes .../Bio/__pycache__/SeqRecord.cpython-37.pyc | Bin 0 -> 45461 bytes .../Bio/__pycache__/__init__.cpython-311.pyc | Bin 0 -> 6038 bytes .../Bio/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 5570 bytes .../Bio/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 5066 bytes .../lib/Bio/__pycache__/_utils.cpython-37.pyc | Bin 0 -> 1379 bytes code/lib/Bio/__pycache__/bgzf.cpython-37.pyc | Bin 0 -> 30332 bytes code/lib/Bio/__pycache__/kNN.cpython-37.pyc | Bin 0 -> 3846 bytes .../Bio/__pycache__/pairwise2.cpython-37.pyc | Bin 0 -> 32622 bytes code/lib/Bio/_utils.py | 70 + code/lib/Bio/bgzf.py | 920 + code/lib/Bio/codonalign/__init__.py | 810 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 16287 bytes .../__pycache__/chisq.cpython-37.pyc | Bin 0 -> 2870 bytes .../__pycache__/codonalignment.cpython-37.pyc | Bin 0 -> 14900 bytes .../__pycache__/codonseq.cpython-37.pyc | Bin 0 -> 33491 bytes code/lib/Bio/codonalign/chisq.py | 148 + code/lib/Bio/codonalign/codonalignment.py | 513 + code/lib/Bio/codonalign/codonseq.py | 1319 + code/lib/Bio/cpairwise2.cp37-win_amd64.pyd | Bin 0 -> 16384 bytes code/lib/Bio/cpairwise2module.c | 479 + code/lib/Bio/kNN.py | 138 + code/lib/Bio/motifs/__init__.py | 610 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 17825 bytes .../__pycache__/alignace.cpython-37.pyc | Bin 0 -> 1672 bytes .../__pycache__/clusterbuster.cpython-37.pyc | Bin 0 -> 2276 bytes .../motifs/__pycache__/mast.cpython-37.pyc | Bin 0 -> 4311 bytes .../motifs/__pycache__/matrix.cpython-37.pyc | Bin 0 -> 17101 bytes .../motifs/__pycache__/meme.cpython-37.pyc | Bin 0 -> 6421 bytes .../motifs/__pycache__/minimal.cpython-37.pyc | Bin 0 -> 4990 bytes .../Bio/motifs/__pycache__/pfm.cpython-37.pyc | Bin 0 -> 11662 bytes .../__pycache__/thresholds.cpython-37.pyc | Bin 0 -> 4092 bytes .../__pycache__/transfac.cpython-37.pyc | Bin 0 -> 7362 bytes .../Bio/motifs/__pycache__/xms.cpython-37.pyc | Bin 0 -> 3823 bytes code/lib/Bio/motifs/_pwm.c | 216 + code/lib/Bio/motifs/_pwm.cp37-win_amd64.pyd | Bin 0 -> 13824 bytes code/lib/Bio/motifs/alignace.py | 67 + code/lib/Bio/motifs/applications/__init__.py | 15 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 436 bytes .../__pycache__/_xxmotif.cpython-37.pyc | Bin 0 -> 7945 bytes code/lib/Bio/motifs/applications/_xxmotif.py | 261 + code/lib/Bio/motifs/clusterbuster.py | 80 + code/lib/Bio/motifs/jaspar/__init__.py | 372 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 9474 bytes .../jaspar/__pycache__/db.cpython-37.pyc | Bin 0 -> 17902 bytes code/lib/Bio/motifs/jaspar/db.py | 776 + code/lib/Bio/motifs/mast.py | 133 + code/lib/Bio/motifs/matrix.py | 550 + code/lib/Bio/motifs/meme.py | 195 + code/lib/Bio/motifs/minimal.py | 193 + code/lib/Bio/motifs/pfm.py | 413 + code/lib/Bio/motifs/thresholds.py | 109 + code/lib/Bio/motifs/transfac.py | 325 + code/lib/Bio/motifs/xms.py | 105 + code/lib/Bio/pairwise2.py | 1431 ++ code/lib/Bio/phenotype/__init__.py | 241 + .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 7430 bytes .../__pycache__/phen_micro.cpython-37.pyc | Bin 0 -> 32112 bytes .../__pycache__/pm_fitting.cpython-37.pyc | Bin 0 -> 3935 bytes code/lib/Bio/phenotype/phen_micro.py | 1207 + code/lib/Bio/phenotype/pm_fitting.py | 146 + .../Building_Literature_Embedding_Model.py | 303 + code/lib/CVD_risk_factor_search.py | 19 + code/lib/ExpCohort_Generator.py | 53 + code/lib/Literature_Data_Collection.py | 117 + code/lib/Literature_Data_Preprocessing.py | 347 + code/lib/Loading_PudMed.py | 229 + code/lib/ML_models.py | 540 + code/lib/Medline/__init__.py | 222 + .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 6266 bytes .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 5888 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 5102 bytes code/lib/Moon_gene2vec.py | 369 + ..._Literature_Embedding_Model.cpython-37.pyc | Bin 0 -> 9763 bytes .../CVD_risk_factor_search.cpython-37.pyc | Bin 0 -> 1043 bytes .../Intrisic_Evaluation.cpython-37.pyc | Bin 0 -> 1016 bytes ...Literature_Data_Collection.cpython-311.pyc | Bin 0 -> 7055 bytes ...Literature_Data_Collection.cpython-312.pyc | Bin 0 -> 6257 bytes .../Literature_Data_Collection.cpython-37.pyc | Bin 0 -> 3754 bytes ...erature_Data_Preprocessing.cpython-312.pyc | Bin 0 -> 15322 bytes ...terature_Data_Preprocessing.cpython-37.pyc | Bin 0 -> 8292 bytes .../Loading_PudMed.cpython-311.pyc | Bin 0 -> 11086 bytes .../Loading_PudMed.cpython-312.pyc | Bin 0 -> 9876 bytes .../__pycache__/Loading_PudMed.cpython-37.pyc | Bin 0 -> 5147 bytes code/lib/__pycache__/ML_models.cpython-37.pyc | Bin 0 -> 15661 bytes .../__pycache__/Moon_gene2vec.cpython-37.pyc | Bin 0 -> 11002 bytes ...oading_literature_embedding.cpython-37.pyc | Bin 0 -> 8534 bytes ...tep4_CVD_risk_factor_search.cpython-37.pyc | Bin 0 -> 1025 bytes code/lib/loading_literature_embedding.py | 228 + code/lib/performance_metrics.py | 60 + code/read_me_images/model1_re.jpg | Bin 0 -> 11746 bytes code/read_me_images/model2_re.jpg | Bin 0 -> 13395 bytes code/read_me_images/model3.png | Bin 0 -> 41684 bytes code/read_me_images/model3_re.jpg | Bin 0 -> 13623 bytes code/read_me_images/preprocessing.png | Bin 0 -> 57403 bytes code/read_me_images/table_collection.png | Bin 0 -> 137847 bytes code/read_me_images/table_data.png | Bin 0 -> 41539 bytes code/read_me_images/table_pre.png | Bin 0 -> 23352 bytes code/run | 50 + code/step1_data_collection.py | 64 + code/step1_data_collection_Custom_Luis.py | 76 + code/step2_data_preprocessing.py | 63 + code/step3_literature_embedding_training.py | 57 + code/step4_CVD_risk_factor_identification.py | 35 + code/step_1_data_collection_Luis.py | 122 + code/step_1_data_collection_Luis_.py | 21 + environment/Dockerfile | 7 + error_log.txt | 371 + metadata/metadata.yml | 31 + 973 files changed, 245872 insertions(+) create mode 100644 .gitignore create mode 100644 REPRODUCING.md create mode 100644 code/Extrinsic_application_CVD_prediction.py create mode 100644 code/LICENSE create mode 100644 code/README.md create mode 100644 code/gene_extraction.py create mode 100644 code/lib/Bio/Affy/CelFile.py create mode 100644 code/lib/Bio/Affy/__init__.py create mode 100644 code/lib/Bio/Affy/__pycache__/CelFile.cpython-37.pyc create mode 100644 code/lib/Bio/Affy/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Align/AlignInfo.py create mode 100644 code/lib/Bio/Align/Applications/_ClustalOmega.py create mode 100644 code/lib/Bio/Align/Applications/_Clustalw.py create mode 100644 code/lib/Bio/Align/Applications/_Dialign.py create mode 100644 code/lib/Bio/Align/Applications/_MSAProbs.py create mode 100644 code/lib/Bio/Align/Applications/_Mafft.py create mode 100644 code/lib/Bio/Align/Applications/_Muscle.py create mode 100644 code/lib/Bio/Align/Applications/_Prank.py create mode 100644 code/lib/Bio/Align/Applications/_Probcons.py create mode 100644 code/lib/Bio/Align/Applications/_TCoffee.py create mode 100644 code/lib/Bio/Align/Applications/__init__.py create mode 100644 code/lib/Bio/Align/Applications/__pycache__/_ClustalOmega.cpython-37.pyc create mode 100644 code/lib/Bio/Align/Applications/__pycache__/_Clustalw.cpython-37.pyc create mode 100644 code/lib/Bio/Align/Applications/__pycache__/_Dialign.cpython-37.pyc create mode 100644 code/lib/Bio/Align/Applications/__pycache__/_MSAProbs.cpython-37.pyc create mode 100644 code/lib/Bio/Align/Applications/__pycache__/_Mafft.cpython-37.pyc create mode 100644 code/lib/Bio/Align/Applications/__pycache__/_Muscle.cpython-37.pyc create mode 100644 code/lib/Bio/Align/Applications/__pycache__/_Prank.cpython-37.pyc create mode 100644 code/lib/Bio/Align/Applications/__pycache__/_Probcons.cpython-37.pyc create mode 100644 code/lib/Bio/Align/Applications/__pycache__/_TCoffee.cpython-37.pyc create mode 100644 code/lib/Bio/Align/Applications/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Align/__init__.py create mode 100644 code/lib/Bio/Align/__pycache__/AlignInfo.cpython-37.pyc create mode 100644 code/lib/Bio/Align/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Align/_aligners.c create mode 100644 code/lib/Bio/Align/_aligners.cp37-win_amd64.pyd create mode 100644 code/lib/Bio/Align/substitution_matrices/__init__.py create mode 100644 code/lib/Bio/Align/substitution_matrices/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Align/substitution_matrices/data/BENNER22 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/BENNER6 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/BENNER74 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/BLOSUM45 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/BLOSUM50 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/BLOSUM62 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/BLOSUM80 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/BLOSUM90 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/DAYHOFF create mode 100644 code/lib/Bio/Align/substitution_matrices/data/FENG create mode 100644 code/lib/Bio/Align/substitution_matrices/data/GENETIC create mode 100644 code/lib/Bio/Align/substitution_matrices/data/GONNET1992 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/HOXD70 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/JOHNSON create mode 100644 code/lib/Bio/Align/substitution_matrices/data/JONES create mode 100644 code/lib/Bio/Align/substitution_matrices/data/LEVIN create mode 100644 code/lib/Bio/Align/substitution_matrices/data/MCLACHLAN create mode 100644 code/lib/Bio/Align/substitution_matrices/data/MDM78 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/NUC.4.4 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/PAM250 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/PAM30 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/PAM70 create mode 100644 code/lib/Bio/Align/substitution_matrices/data/RAO create mode 100644 code/lib/Bio/Align/substitution_matrices/data/RISLER create mode 100644 code/lib/Bio/Align/substitution_matrices/data/SCHNEIDER create mode 100644 code/lib/Bio/Align/substitution_matrices/data/STR create mode 100644 code/lib/Bio/Align/substitution_matrices/data/TRANS create mode 100644 code/lib/Bio/AlignIO/ClustalIO.py create mode 100644 code/lib/Bio/AlignIO/EmbossIO.py create mode 100644 code/lib/Bio/AlignIO/FastaIO.py create mode 100644 code/lib/Bio/AlignIO/Interfaces.py create mode 100644 code/lib/Bio/AlignIO/MafIO.py create mode 100644 code/lib/Bio/AlignIO/MauveIO.py create mode 100644 code/lib/Bio/AlignIO/MsfIO.py create mode 100644 code/lib/Bio/AlignIO/NexusIO.py create mode 100644 code/lib/Bio/AlignIO/PhylipIO.py create mode 100644 code/lib/Bio/AlignIO/StockholmIO.py create mode 100644 code/lib/Bio/AlignIO/__init__.py create mode 100644 code/lib/Bio/AlignIO/__pycache__/ClustalIO.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/EmbossIO.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/FastaIO.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/Interfaces.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/MafIO.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/MauveIO.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/MsfIO.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/NexusIO.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/PhylipIO.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/StockholmIO.cpython-37.pyc create mode 100644 code/lib/Bio/AlignIO/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Alphabet/__init__.py create mode 100644 code/lib/Bio/Alphabet/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Application/__init__.py create mode 100644 code/lib/Bio/Application/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Blast/Applications.py create mode 100644 code/lib/Bio/Blast/NCBIWWW.py create mode 100644 code/lib/Bio/Blast/NCBIXML.py create mode 100644 code/lib/Bio/Blast/ParseBlastTable.py create mode 100644 code/lib/Bio/Blast/Record.py create mode 100644 code/lib/Bio/Blast/__init__.py create mode 100644 code/lib/Bio/Blast/__pycache__/Applications.cpython-37.pyc create mode 100644 code/lib/Bio/Blast/__pycache__/NCBIWWW.cpython-37.pyc create mode 100644 code/lib/Bio/Blast/__pycache__/NCBIXML.cpython-37.pyc create mode 100644 code/lib/Bio/Blast/__pycache__/ParseBlastTable.cpython-37.pyc create mode 100644 code/lib/Bio/Blast/__pycache__/Record.cpython-37.pyc create mode 100644 code/lib/Bio/Blast/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/CAPS/__init__.py create mode 100644 code/lib/Bio/CAPS/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Cluster/__init__.py create mode 100644 code/lib/Bio/Cluster/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Cluster/_cluster.cp37-win_amd64.pyd create mode 100644 code/lib/Bio/Cluster/cluster.c create mode 100644 code/lib/Bio/Cluster/cluster.h create mode 100644 code/lib/Bio/Cluster/clustermodule.c create mode 100644 code/lib/Bio/Compass/__init__.py create mode 100644 code/lib/Bio/Compass/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Data/CodonTable.py create mode 100644 code/lib/Bio/Data/IUPACData.py create mode 100644 code/lib/Bio/Data/SCOPData.py create mode 100644 code/lib/Bio/Data/__init__.py create mode 100644 code/lib/Bio/Data/__pycache__/CodonTable.cpython-37.pyc create mode 100644 code/lib/Bio/Data/__pycache__/IUPACData.cpython-37.pyc create mode 100644 code/lib/Bio/Data/__pycache__/SCOPData.cpython-37.pyc create mode 100644 code/lib/Bio/Data/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Emboss/Applications.py create mode 100644 code/lib/Bio/Emboss/Primer3.py create mode 100644 code/lib/Bio/Emboss/PrimerSearch.py create mode 100644 code/lib/Bio/Emboss/__init__.py create mode 100644 code/lib/Bio/Emboss/__pycache__/Applications.cpython-37.pyc create mode 100644 code/lib/Bio/Emboss/__pycache__/Primer3.cpython-37.pyc create mode 100644 code/lib/Bio/Emboss/__pycache__/PrimerSearch.cpython-37.pyc create mode 100644 code/lib/Bio/Emboss/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_0.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_0.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_1.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_1.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_2.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_2.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_3.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_3.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_4.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/Docsum_3_4.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/EMBL_General.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/EMBL_General.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/GenBank_General.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/GenBank_General.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/HomoloGene.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/HomoloGene.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/INSD_INSDSeq.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/MMDB.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/MMDB.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/MMDB_Chemical_graph.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/MMDB_Chemical_graph.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/MMDB_Features.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/MMDB_Features.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/MMDB_Structural_model.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/MMDB_Structural_model.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Access.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Access.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Biblio.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Biblio.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_BioSource.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_BioSource.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_BioTree.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_BioTree.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Blast4.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Blast4.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_BlastDL.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_BlastOutput.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_BlastOutput.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Cdd.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Cdd.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Cn3d.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Entity.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Entrez2.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Entrezgene.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Entrezgene.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_FeatDef.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_GBSeq.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Gene.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Gene.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_General.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_General.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_ID1Access.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_ID1Access.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_ID2Access.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_ID2Access.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_MedArchive.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_MedArchive.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Medlars.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Medlars.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Medline.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Medline.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Mim.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Mim.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Mime.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Mime.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_ObjPrt.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Organism.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Organism.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_PCAssay.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_PCSubstance.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_PCSubstance.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Project.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Project.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Protein.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Protein.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Pub.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Pub.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_PubMed.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_PubMed.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_RNA.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_RNA.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Remap.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Remap.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Rsite.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Rsite.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_ScoreMat.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_ScoreMat.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_SeqCode.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_SeqTable.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_SeqTable.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seq_split.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seq_split.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqalign.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqalign.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqfeat.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqloc.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqres.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqres.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqset.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Seqset.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Sequence.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Sequence.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Submit.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Submit.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Systems.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_TSeq.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_TSeq.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_TxInit.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_TxInit.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Variation.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_Variation.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NCBI_all.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NSE.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/NSE.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/OMSSA.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/OMSSA.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/PDB_General.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/PDB_General.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/PIR_General.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/PIR_General.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/PRF_General.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/PRF_General.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/SP_General.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/SP_General.mod.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/XHTMLtablesetup.ent create mode 100644 code/lib/Bio/Entrez/DTDs/archivearticle.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/archivecustom-classes.ent create mode 100644 code/lib/Bio/Entrez/DTDs/archivecustom-mixes.ent create mode 100644 code/lib/Bio/Entrez/DTDs/archivecustom-models.ent create mode 100644 code/lib/Bio/Entrez/DTDs/archivecustom-modules.ent create mode 100644 code/lib/Bio/Entrez/DTDs/articlemeta.ent create mode 100644 code/lib/Bio/Entrez/DTDs/backmatter.ent create mode 100644 code/lib/Bio/Entrez/DTDs/bookdoc_100301.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/bookdoc_110101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/bookdoc_120101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/bookdoc_130101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/bookdoc_140101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/bookdoc_150101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/chars.ent create mode 100644 code/lib/Bio/Entrez/DTDs/common.ent create mode 100644 code/lib/Bio/Entrez/DTDs/default-classes.ent create mode 100644 code/lib/Bio/Entrez/DTDs/default-mixes.ent create mode 100644 code/lib/Bio/Entrez/DTDs/display.ent create mode 100644 code/lib/Bio/Entrez/DTDs/eInfo_020511.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/eLink_090910.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/eLink_101123.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/ePost_020511.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/eSearch_020511.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/eSpell.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/eSummary_041029.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/egquery.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/einfo.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/elink_020122.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/esearch.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/esummary-v1.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/format.ent create mode 100644 code/lib/Bio/Entrez/DTDs/htmltable.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/isoamsa.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isoamsb.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isoamsc.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isoamsn.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isoamso.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isoamsr.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isobox.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isocyr1.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isocyr2.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isodia.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isogrk1.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isogrk2.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isogrk3.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isogrk4.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isolat1.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isolat2.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isomfrk.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isomopf.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isomscr.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isonum.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isopub.ent create mode 100644 code/lib/Bio/Entrez/DTDs/isotech.ent create mode 100644 code/lib/Bio/Entrez/DTDs/journalmeta.ent create mode 100644 code/lib/Bio/Entrez/DTDs/link.ent create mode 100644 code/lib/Bio/Entrez/DTDs/list.ent create mode 100644 code/lib/Bio/Entrez/DTDs/math.ent create mode 100644 code/lib/Bio/Entrez/DTDs/mathml-in-pubmed.mod create mode 100644 code/lib/Bio/Entrez/DTDs/mathml2-qname-1.mod create mode 100644 code/lib/Bio/Entrez/DTDs/mathml2.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/mathml3-qname1.mod create mode 100644 code/lib/Bio/Entrez/DTDs/mathml3.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/mathmlsetup.ent create mode 100644 code/lib/Bio/Entrez/DTDs/mmlalias.ent create mode 100644 code/lib/Bio/Entrez/DTDs/mmlextra.ent create mode 100644 code/lib/Bio/Entrez/DTDs/modules.ent create mode 100644 code/lib/Bio/Entrez/DTDs/nlm-articleset-2.0.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmcatalogrecordset_170601.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmcommon_011101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmcommon_080101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmcommon_090101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedline_011101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedline_080101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedline_090101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_011101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_080101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitation_090101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_100101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_100301.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_110101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_120101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_130101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_130501.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_140101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmmedlinecitationset_150101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmserials_080101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmserials_100101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmsharedcatcit_080101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/nlmsharedcatcit_090101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/notat.ent create mode 100644 code/lib/Bio/Entrez/DTDs/para.ent create mode 100644 code/lib/Bio/Entrez/DTDs/phrase.ent create mode 100644 code/lib/Bio/Entrez/DTDs/pmc-1.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_020114.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_080101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_090101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_100101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_100301.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_110101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_120101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_130101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_130501.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_140101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_150101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_180101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_180601.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/pubmed_190101.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/references.ent create mode 100644 code/lib/Bio/Entrez/DTDs/section.ent create mode 100644 code/lib/Bio/Entrez/DTDs/taxon.dtd create mode 100644 code/lib/Bio/Entrez/DTDs/xmlspecchars.ent create mode 100644 code/lib/Bio/Entrez/Parser.py create mode 100644 code/lib/Bio/Entrez/XSDs/IPGReportSet.xsd create mode 100644 code/lib/Bio/Entrez/__init__.py create mode 100644 code/lib/Bio/Entrez/__pycache__/Parser.cpython-311.pyc create mode 100644 code/lib/Bio/Entrez/__pycache__/Parser.cpython-312.pyc create mode 100644 code/lib/Bio/Entrez/__pycache__/Parser.cpython-37.pyc create mode 100644 code/lib/Bio/Entrez/__pycache__/__init__.cpython-311.pyc create mode 100644 code/lib/Bio/Entrez/__pycache__/__init__.cpython-312.pyc create mode 100644 code/lib/Bio/Entrez/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/ExPASy/Enzyme.py create mode 100644 code/lib/Bio/ExPASy/Prodoc.py create mode 100644 code/lib/Bio/ExPASy/Prosite.py create mode 100644 code/lib/Bio/ExPASy/ScanProsite.py create mode 100644 code/lib/Bio/ExPASy/__init__.py create mode 100644 code/lib/Bio/ExPASy/__pycache__/Enzyme.cpython-37.pyc create mode 100644 code/lib/Bio/ExPASy/__pycache__/Prodoc.cpython-37.pyc create mode 100644 code/lib/Bio/ExPASy/__pycache__/Prosite.cpython-37.pyc create mode 100644 code/lib/Bio/ExPASy/__pycache__/ScanProsite.cpython-37.pyc create mode 100644 code/lib/Bio/ExPASy/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/ExPASy/__pycache__/cellosaurus.cpython-37.pyc create mode 100644 code/lib/Bio/ExPASy/cellosaurus.py create mode 100644 code/lib/Bio/File.py create mode 100644 code/lib/Bio/GenBank/Record.py create mode 100644 code/lib/Bio/GenBank/Scanner.py create mode 100644 code/lib/Bio/GenBank/__init__.py create mode 100644 code/lib/Bio/GenBank/__pycache__/Record.cpython-37.pyc create mode 100644 code/lib/Bio/GenBank/__pycache__/Scanner.cpython-37.pyc create mode 100644 code/lib/Bio/GenBank/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/GenBank/__pycache__/utils.cpython-37.pyc create mode 100644 code/lib/Bio/GenBank/utils.py create mode 100644 code/lib/Bio/Geo/Record.py create mode 100644 code/lib/Bio/Geo/__init__.py create mode 100644 code/lib/Bio/Geo/__pycache__/Record.cpython-37.pyc create mode 100644 code/lib/Bio/Geo/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/BasicChromosome.py create mode 100644 code/lib/Bio/Graphics/ColorSpiral.py create mode 100644 code/lib/Bio/Graphics/Comparative.py create mode 100644 code/lib/Bio/Graphics/DisplayRepresentation.py create mode 100644 code/lib/Bio/Graphics/Distribution.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_AbstractDrawer.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_CircularDrawer.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_Colors.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_CrossLink.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_Diagram.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_Feature.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_FeatureSet.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_Graph.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_GraphSet.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_LinearDrawer.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/_Track.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__init__.py create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_AbstractDrawer.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_CircularDrawer.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Colors.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_CrossLink.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Diagram.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Feature.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_FeatureSet.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Graph.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_GraphSet.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_LinearDrawer.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/_Track.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/GenomeDiagram/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/KGML_vis.py create mode 100644 code/lib/Bio/Graphics/__init__.py create mode 100644 code/lib/Bio/Graphics/__pycache__/BasicChromosome.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/__pycache__/ColorSpiral.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/__pycache__/Comparative.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/__pycache__/DisplayRepresentation.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/__pycache__/Distribution.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/__pycache__/KGML_vis.cpython-37.pyc create mode 100644 code/lib/Bio/Graphics/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/HMM/DynamicProgramming.py create mode 100644 code/lib/Bio/HMM/MarkovModel.py create mode 100644 code/lib/Bio/HMM/Trainer.py create mode 100644 code/lib/Bio/HMM/Utilities.py create mode 100644 code/lib/Bio/HMM/__init__.py create mode 100644 code/lib/Bio/HMM/__pycache__/DynamicProgramming.cpython-37.pyc create mode 100644 code/lib/Bio/HMM/__pycache__/MarkovModel.cpython-37.pyc create mode 100644 code/lib/Bio/HMM/__pycache__/Trainer.cpython-37.pyc create mode 100644 code/lib/Bio/HMM/__pycache__/Utilities.cpython-37.pyc create mode 100644 code/lib/Bio/HMM/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/KEGG/Compound/__init__.py create mode 100644 code/lib/Bio/KEGG/Compound/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/KEGG/Enzyme/__init__.py create mode 100644 code/lib/Bio/KEGG/Enzyme/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/KEGG/Gene/__init__.py create mode 100644 code/lib/Bio/KEGG/Gene/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/KEGG/KGML/KGML_parser.py create mode 100644 code/lib/Bio/KEGG/KGML/KGML_pathway.py create mode 100644 code/lib/Bio/KEGG/KGML/__init__.py create mode 100644 code/lib/Bio/KEGG/KGML/__pycache__/KGML_parser.cpython-37.pyc create mode 100644 code/lib/Bio/KEGG/KGML/__pycache__/KGML_pathway.cpython-37.pyc create mode 100644 code/lib/Bio/KEGG/KGML/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/KEGG/Map/__init__.py create mode 100644 code/lib/Bio/KEGG/Map/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/KEGG/REST.py create mode 100644 code/lib/Bio/KEGG/__init__.py create mode 100644 code/lib/Bio/KEGG/__pycache__/REST.cpython-37.pyc create mode 100644 code/lib/Bio/KEGG/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/LogisticRegression.py create mode 100644 code/lib/Bio/MarkovModel.py create mode 100644 code/lib/Bio/MaxEntropy.py create mode 100644 code/lib/Bio/Medline/__init__.py create mode 100644 code/lib/Bio/Medline/__pycache__/__init__.cpython-311.pyc create mode 100644 code/lib/Bio/Medline/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/NMR/NOEtools.py create mode 100644 code/lib/Bio/NMR/__init__.py create mode 100644 code/lib/Bio/NMR/__pycache__/NOEtools.cpython-37.pyc create mode 100644 code/lib/Bio/NMR/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/NMR/__pycache__/xpktools.cpython-37.pyc create mode 100644 code/lib/Bio/NMR/xpktools.py create mode 100644 code/lib/Bio/NaiveBayes.py create mode 100644 code/lib/Bio/Nexus/Nexus.py create mode 100644 code/lib/Bio/Nexus/Nodes.py create mode 100644 code/lib/Bio/Nexus/StandardData.py create mode 100644 code/lib/Bio/Nexus/Trees.py create mode 100644 code/lib/Bio/Nexus/__init__.py create mode 100644 code/lib/Bio/Nexus/__pycache__/Nexus.cpython-37.pyc create mode 100644 code/lib/Bio/Nexus/__pycache__/Nodes.cpython-37.pyc create mode 100644 code/lib/Bio/Nexus/__pycache__/StandardData.cpython-37.pyc create mode 100644 code/lib/Bio/Nexus/__pycache__/Trees.cpython-37.pyc create mode 100644 code/lib/Bio/Nexus/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Nexus/cnexus.c create mode 100644 code/lib/Bio/Nexus/cnexus.cp37-win_amd64.pyd create mode 100644 code/lib/Bio/PDB/AbstractPropertyMap.py create mode 100644 code/lib/Bio/PDB/Atom.py create mode 100644 code/lib/Bio/PDB/Chain.py create mode 100644 code/lib/Bio/PDB/DSSP.py create mode 100644 code/lib/Bio/PDB/Dice.py create mode 100644 code/lib/Bio/PDB/Entity.py create mode 100644 code/lib/Bio/PDB/FragmentMapper.py create mode 100644 code/lib/Bio/PDB/HSExposure.py create mode 100644 code/lib/Bio/PDB/MMCIF2Dict.py create mode 100644 code/lib/Bio/PDB/MMCIFParser.py create mode 100644 code/lib/Bio/PDB/Model.py create mode 100644 code/lib/Bio/PDB/NACCESS.py create mode 100644 code/lib/Bio/PDB/NeighborSearch.py create mode 100644 code/lib/Bio/PDB/PDBExceptions.py create mode 100644 code/lib/Bio/PDB/PDBIO.py create mode 100644 code/lib/Bio/PDB/PDBList.py create mode 100644 code/lib/Bio/PDB/PDBParser.py create mode 100644 code/lib/Bio/PDB/PICIO.py create mode 100644 code/lib/Bio/PDB/PSEA.py create mode 100644 code/lib/Bio/PDB/Polypeptide.py create mode 100644 code/lib/Bio/PDB/QCPSuperimposer/__init__.py create mode 100644 code/lib/Bio/PDB/QCPSuperimposer/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/QCPSuperimposer/qcprotmodule.c create mode 100644 code/lib/Bio/PDB/QCPSuperimposer/qcprotmodule.cp37-win_amd64.pyd create mode 100644 code/lib/Bio/PDB/Residue.py create mode 100644 code/lib/Bio/PDB/ResidueDepth.py create mode 100644 code/lib/Bio/PDB/SASA.py create mode 100644 code/lib/Bio/PDB/SCADIO.py create mode 100644 code/lib/Bio/PDB/Selection.py create mode 100644 code/lib/Bio/PDB/Structure.py create mode 100644 code/lib/Bio/PDB/StructureAlignment.py create mode 100644 code/lib/Bio/PDB/StructureBuilder.py create mode 100644 code/lib/Bio/PDB/Superimposer.py create mode 100644 code/lib/Bio/PDB/__init__.py create mode 100644 code/lib/Bio/PDB/__pycache__/AbstractPropertyMap.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Atom.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Chain.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/DSSP.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Dice.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Entity.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/FragmentMapper.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/HSExposure.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/MMCIF2Dict.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/MMCIFParser.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Model.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/NACCESS.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/NeighborSearch.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/PDBExceptions.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/PDBIO.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/PDBList.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/PDBParser.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/PICIO.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/PSEA.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Polypeptide.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Residue.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/ResidueDepth.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/SASA.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/SCADIO.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Selection.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Structure.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/StructureAlignment.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/StructureBuilder.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/Superimposer.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/ic_data.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/ic_rebuild.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/internal_coords.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/mmcifio.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/parse_pdb_header.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/__pycache__/vectors.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/ic_data.py create mode 100644 code/lib/Bio/PDB/ic_rebuild.py create mode 100644 code/lib/Bio/PDB/internal_coords.py create mode 100644 code/lib/Bio/PDB/kdtrees.c create mode 100644 code/lib/Bio/PDB/kdtrees.cp37-win_amd64.pyd create mode 100644 code/lib/Bio/PDB/mmcifio.py create mode 100644 code/lib/Bio/PDB/mmtf/DefaultParser.py create mode 100644 code/lib/Bio/PDB/mmtf/__init__.py create mode 100644 code/lib/Bio/PDB/mmtf/__pycache__/DefaultParser.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/mmtf/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/mmtf/__pycache__/mmtfio.cpython-37.pyc create mode 100644 code/lib/Bio/PDB/mmtf/mmtfio.py create mode 100644 code/lib/Bio/PDB/parse_pdb_header.py create mode 100644 code/lib/Bio/PDB/vectors.py create mode 100644 code/lib/Bio/Pathway/Rep/Graph.py create mode 100644 code/lib/Bio/Pathway/Rep/MultiGraph.py create mode 100644 code/lib/Bio/Pathway/Rep/__init__.py create mode 100644 code/lib/Bio/Pathway/Rep/__pycache__/Graph.cpython-37.pyc create mode 100644 code/lib/Bio/Pathway/Rep/__pycache__/MultiGraph.cpython-37.pyc create mode 100644 code/lib/Bio/Pathway/Rep/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Pathway/__init__.py create mode 100644 code/lib/Bio/Pathway/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/Applications/_Fasttree.py create mode 100644 code/lib/Bio/Phylo/Applications/_Phyml.py create mode 100644 code/lib/Bio/Phylo/Applications/_Raxml.py create mode 100644 code/lib/Bio/Phylo/Applications/__init__.py create mode 100644 code/lib/Bio/Phylo/Applications/__pycache__/_Fasttree.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/Applications/__pycache__/_Phyml.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/Applications/__pycache__/_Raxml.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/Applications/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/BaseTree.py create mode 100644 code/lib/Bio/Phylo/CDAO.py create mode 100644 code/lib/Bio/Phylo/CDAOIO.py create mode 100644 code/lib/Bio/Phylo/Consensus.py create mode 100644 code/lib/Bio/Phylo/NeXML.py create mode 100644 code/lib/Bio/Phylo/NeXMLIO.py create mode 100644 code/lib/Bio/Phylo/Newick.py create mode 100644 code/lib/Bio/Phylo/NewickIO.py create mode 100644 code/lib/Bio/Phylo/NexusIO.py create mode 100644 code/lib/Bio/Phylo/PAML/__init__.py create mode 100644 code/lib/Bio/Phylo/PAML/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/PAML/__pycache__/_paml.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/PAML/__pycache__/_parse_baseml.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/PAML/__pycache__/_parse_codeml.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/PAML/__pycache__/_parse_yn00.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/PAML/__pycache__/baseml.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/PAML/__pycache__/chi2.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/PAML/__pycache__/codeml.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/PAML/__pycache__/yn00.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/PAML/_paml.py create mode 100644 code/lib/Bio/Phylo/PAML/_parse_baseml.py create mode 100644 code/lib/Bio/Phylo/PAML/_parse_codeml.py create mode 100644 code/lib/Bio/Phylo/PAML/_parse_yn00.py create mode 100644 code/lib/Bio/Phylo/PAML/baseml.py create mode 100644 code/lib/Bio/Phylo/PAML/chi2.py create mode 100644 code/lib/Bio/Phylo/PAML/codeml.py create mode 100644 code/lib/Bio/Phylo/PAML/yn00.py create mode 100644 code/lib/Bio/Phylo/PhyloXML.py create mode 100644 code/lib/Bio/Phylo/PhyloXMLIO.py create mode 100644 code/lib/Bio/Phylo/TreeConstruction.py create mode 100644 code/lib/Bio/Phylo/__init__.py create mode 100644 code/lib/Bio/Phylo/__pycache__/BaseTree.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/CDAO.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/CDAOIO.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/Consensus.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/NeXML.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/NeXMLIO.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/Newick.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/NewickIO.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/NexusIO.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/PhyloXML.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/PhyloXMLIO.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/TreeConstruction.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/_cdao_owl.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/_io.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/__pycache__/_utils.cpython-37.pyc create mode 100644 code/lib/Bio/Phylo/_cdao_owl.py create mode 100644 code/lib/Bio/Phylo/_io.py create mode 100644 code/lib/Bio/Phylo/_utils.py create mode 100644 code/lib/Bio/PopGen/GenePop/Controller.py create mode 100644 code/lib/Bio/PopGen/GenePop/EasyController.py create mode 100644 code/lib/Bio/PopGen/GenePop/FileParser.py create mode 100644 code/lib/Bio/PopGen/GenePop/LargeFileParser.py create mode 100644 code/lib/Bio/PopGen/GenePop/__init__.py create mode 100644 code/lib/Bio/PopGen/GenePop/__pycache__/Controller.cpython-37.pyc create mode 100644 code/lib/Bio/PopGen/GenePop/__pycache__/EasyController.cpython-37.pyc create mode 100644 code/lib/Bio/PopGen/GenePop/__pycache__/FileParser.cpython-37.pyc create mode 100644 code/lib/Bio/PopGen/GenePop/__pycache__/LargeFileParser.cpython-37.pyc create mode 100644 code/lib/Bio/PopGen/GenePop/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/PopGen/__init__.py create mode 100644 code/lib/Bio/PopGen/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Restriction/PrintFormat.py create mode 100644 code/lib/Bio/Restriction/Restriction.py create mode 100644 code/lib/Bio/Restriction/Restriction_Dictionary.py create mode 100644 code/lib/Bio/Restriction/__init__.py create mode 100644 code/lib/Bio/Restriction/__pycache__/PrintFormat.cpython-37.pyc create mode 100644 code/lib/Bio/Restriction/__pycache__/Restriction.cpython-37.pyc create mode 100644 code/lib/Bio/Restriction/__pycache__/Restriction_Dictionary.cpython-37.pyc create mode 100644 code/lib/Bio/Restriction/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SCOP/Cla.py create mode 100644 code/lib/Bio/SCOP/Des.py create mode 100644 code/lib/Bio/SCOP/Dom.py create mode 100644 code/lib/Bio/SCOP/Hie.py create mode 100644 code/lib/Bio/SCOP/Raf.py create mode 100644 code/lib/Bio/SCOP/Residues.py create mode 100644 code/lib/Bio/SCOP/__init__.py create mode 100644 code/lib/Bio/SCOP/__pycache__/Cla.cpython-37.pyc create mode 100644 code/lib/Bio/SCOP/__pycache__/Des.cpython-37.pyc create mode 100644 code/lib/Bio/SCOP/__pycache__/Dom.cpython-37.pyc create mode 100644 code/lib/Bio/SCOP/__pycache__/Hie.cpython-37.pyc create mode 100644 code/lib/Bio/SCOP/__pycache__/Raf.cpython-37.pyc create mode 100644 code/lib/Bio/SCOP/__pycache__/Residues.cpython-37.pyc create mode 100644 code/lib/Bio/SCOP/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SVDSuperimposer/__init__.py create mode 100644 code/lib/Bio/SVDSuperimposer/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/BlastIO/__init__.py create mode 100644 code/lib/Bio/SearchIO/BlastIO/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_tab.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_text.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/BlastIO/__pycache__/blast_xml.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/BlastIO/blast_tab.py create mode 100644 code/lib/Bio/SearchIO/BlastIO/blast_text.py create mode 100644 code/lib/Bio/SearchIO/BlastIO/blast_xml.py create mode 100644 code/lib/Bio/SearchIO/BlatIO.py create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/__init__.py create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/__pycache__/_base.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_cigar.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_text.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/__pycache__/exonerate_vulgar.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/_base.py create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/exonerate_cigar.py create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/exonerate_text.py create mode 100644 code/lib/Bio/SearchIO/ExonerateIO/exonerate_vulgar.py create mode 100644 code/lib/Bio/SearchIO/FastaIO.py create mode 100644 code/lib/Bio/SearchIO/HHsuiteIO/__init__.py create mode 100644 code/lib/Bio/SearchIO/HHsuiteIO/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/HHsuiteIO/__pycache__/hhsuite2_text.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/HHsuiteIO/hhsuite2_text.py create mode 100644 code/lib/Bio/SearchIO/HmmerIO/__init__.py create mode 100644 code/lib/Bio/SearchIO/HmmerIO/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/HmmerIO/__pycache__/_base.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer2_text.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_domtab.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_tab.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/HmmerIO/__pycache__/hmmer3_text.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/HmmerIO/_base.py create mode 100644 code/lib/Bio/SearchIO/HmmerIO/hmmer2_text.py create mode 100644 code/lib/Bio/SearchIO/HmmerIO/hmmer3_domtab.py create mode 100644 code/lib/Bio/SearchIO/HmmerIO/hmmer3_tab.py create mode 100644 code/lib/Bio/SearchIO/HmmerIO/hmmer3_text.py create mode 100644 code/lib/Bio/SearchIO/InterproscanIO/__init__.py create mode 100644 code/lib/Bio/SearchIO/InterproscanIO/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/InterproscanIO/__pycache__/interproscan_xml.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/InterproscanIO/interproscan_xml.py create mode 100644 code/lib/Bio/SearchIO/__init__.py create mode 100644 code/lib/Bio/SearchIO/__pycache__/BlatIO.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/__pycache__/FastaIO.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/__pycache__/_index.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/__pycache__/_utils.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/_index.py create mode 100644 code/lib/Bio/SearchIO/_legacy/NCBIStandalone.py create mode 100644 code/lib/Bio/SearchIO/_legacy/ParserSupport.py create mode 100644 code/lib/Bio/SearchIO/_legacy/__init__.py create mode 100644 code/lib/Bio/SearchIO/_legacy/__pycache__/NCBIStandalone.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/_legacy/__pycache__/ParserSupport.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/_legacy/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/_model/__init__.py create mode 100644 code/lib/Bio/SearchIO/_model/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/_model/__pycache__/_base.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/_model/__pycache__/hit.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/_model/__pycache__/hsp.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/_model/__pycache__/query.cpython-37.pyc create mode 100644 code/lib/Bio/SearchIO/_model/_base.py create mode 100644 code/lib/Bio/SearchIO/_model/hit.py create mode 100644 code/lib/Bio/SearchIO/_model/hsp.py create mode 100644 code/lib/Bio/SearchIO/_model/query.py create mode 100644 code/lib/Bio/SearchIO/_utils.py create mode 100644 code/lib/Bio/Seq.py create mode 100644 code/lib/Bio/SeqFeature.py create mode 100644 code/lib/Bio/SeqIO/AbiIO.py create mode 100644 code/lib/Bio/SeqIO/AceIO.py create mode 100644 code/lib/Bio/SeqIO/FastaIO.py create mode 100644 code/lib/Bio/SeqIO/GckIO.py create mode 100644 code/lib/Bio/SeqIO/IgIO.py create mode 100644 code/lib/Bio/SeqIO/InsdcIO.py create mode 100644 code/lib/Bio/SeqIO/Interfaces.py create mode 100644 code/lib/Bio/SeqIO/NibIO.py create mode 100644 code/lib/Bio/SeqIO/PdbIO.py create mode 100644 code/lib/Bio/SeqIO/PhdIO.py create mode 100644 code/lib/Bio/SeqIO/PirIO.py create mode 100644 code/lib/Bio/SeqIO/QualityIO.py create mode 100644 code/lib/Bio/SeqIO/SeqXmlIO.py create mode 100644 code/lib/Bio/SeqIO/SffIO.py create mode 100644 code/lib/Bio/SeqIO/SnapGeneIO.py create mode 100644 code/lib/Bio/SeqIO/SwissIO.py create mode 100644 code/lib/Bio/SeqIO/TabIO.py create mode 100644 code/lib/Bio/SeqIO/TwoBitIO.py create mode 100644 code/lib/Bio/SeqIO/UniprotIO.py create mode 100644 code/lib/Bio/SeqIO/XdnaIO.py create mode 100644 code/lib/Bio/SeqIO/__init__.py create mode 100644 code/lib/Bio/SeqIO/__pycache__/AbiIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/AceIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/FastaIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/GckIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/IgIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/InsdcIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/Interfaces.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/NibIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/PdbIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/PhdIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/PirIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/QualityIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/SeqXmlIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/SffIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/SnapGeneIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/SwissIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/TabIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/TwoBitIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/UniprotIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/XdnaIO.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/__pycache__/_index.cpython-37.pyc create mode 100644 code/lib/Bio/SeqIO/_index.py create mode 100644 code/lib/Bio/SeqIO/_twoBitIO.c create mode 100644 code/lib/Bio/SeqIO/_twoBitIO.cp37-win_amd64.pyd create mode 100644 code/lib/Bio/SeqRecord.py create mode 100644 code/lib/Bio/SeqUtils/CheckSum.py create mode 100644 code/lib/Bio/SeqUtils/CodonUsage.py create mode 100644 code/lib/Bio/SeqUtils/CodonUsageIndices.py create mode 100644 code/lib/Bio/SeqUtils/IsoelectricPoint.py create mode 100644 code/lib/Bio/SeqUtils/MeltingTemp.py create mode 100644 code/lib/Bio/SeqUtils/ProtParam.py create mode 100644 code/lib/Bio/SeqUtils/ProtParamData.py create mode 100644 code/lib/Bio/SeqUtils/__init__.py create mode 100644 code/lib/Bio/SeqUtils/__pycache__/CheckSum.cpython-37.pyc create mode 100644 code/lib/Bio/SeqUtils/__pycache__/CodonUsage.cpython-37.pyc create mode 100644 code/lib/Bio/SeqUtils/__pycache__/CodonUsageIndices.cpython-37.pyc create mode 100644 code/lib/Bio/SeqUtils/__pycache__/IsoelectricPoint.cpython-37.pyc create mode 100644 code/lib/Bio/SeqUtils/__pycache__/MeltingTemp.cpython-37.pyc create mode 100644 code/lib/Bio/SeqUtils/__pycache__/ProtParam.cpython-37.pyc create mode 100644 code/lib/Bio/SeqUtils/__pycache__/ProtParamData.cpython-37.pyc create mode 100644 code/lib/Bio/SeqUtils/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SeqUtils/__pycache__/lcc.cpython-37.pyc create mode 100644 code/lib/Bio/SeqUtils/lcc.py create mode 100644 code/lib/Bio/Sequencing/Ace.py create mode 100644 code/lib/Bio/Sequencing/Applications/_Novoalign.py create mode 100644 code/lib/Bio/Sequencing/Applications/__init__.py create mode 100644 code/lib/Bio/Sequencing/Applications/__pycache__/_Novoalign.cpython-37.pyc create mode 100644 code/lib/Bio/Sequencing/Applications/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Sequencing/Applications/__pycache__/_bwa.cpython-37.pyc create mode 100644 code/lib/Bio/Sequencing/Applications/__pycache__/_samtools.cpython-37.pyc create mode 100644 code/lib/Bio/Sequencing/Applications/_bwa.py create mode 100644 code/lib/Bio/Sequencing/Applications/_samtools.py create mode 100644 code/lib/Bio/Sequencing/Phd.py create mode 100644 code/lib/Bio/Sequencing/__init__.py create mode 100644 code/lib/Bio/Sequencing/__pycache__/Ace.cpython-37.pyc create mode 100644 code/lib/Bio/Sequencing/__pycache__/Phd.cpython-37.pyc create mode 100644 code/lib/Bio/Sequencing/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SubsMat/FreqTable.py create mode 100644 code/lib/Bio/SubsMat/MatrixInfo.py create mode 100644 code/lib/Bio/SubsMat/__init__.py create mode 100644 code/lib/Bio/SubsMat/__pycache__/FreqTable.cpython-37.pyc create mode 100644 code/lib/Bio/SubsMat/__pycache__/MatrixInfo.cpython-37.pyc create mode 100644 code/lib/Bio/SubsMat/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/SwissProt/KeyWList.py create mode 100644 code/lib/Bio/SwissProt/__init__.py create mode 100644 code/lib/Bio/SwissProt/__pycache__/KeyWList.cpython-37.pyc create mode 100644 code/lib/Bio/SwissProt/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/TogoWS/__init__.py create mode 100644 code/lib/Bio/TogoWS/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/UniGene/__init__.py create mode 100644 code/lib/Bio/UniGene/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/UniProt/GOA.py create mode 100644 code/lib/Bio/UniProt/__init__.py create mode 100644 code/lib/Bio/UniProt/__pycache__/GOA.cpython-37.pyc create mode 100644 code/lib/Bio/UniProt/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Wise/__init__.py create mode 100644 code/lib/Bio/Wise/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/Wise/__pycache__/dnal.cpython-37.pyc create mode 100644 code/lib/Bio/Wise/__pycache__/psw.cpython-37.pyc create mode 100644 code/lib/Bio/Wise/dnal.py create mode 100644 code/lib/Bio/Wise/psw.py create mode 100644 code/lib/Bio/__init__.py create mode 100644 code/lib/Bio/__pycache__/File.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/LogisticRegression.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/MarkovModel.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/MaxEntropy.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/NaiveBayes.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/Seq.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/SeqFeature.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/SeqRecord.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/__init__.cpython-311.pyc create mode 100644 code/lib/Bio/__pycache__/__init__.cpython-312.pyc create mode 100644 code/lib/Bio/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/_utils.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/bgzf.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/kNN.cpython-37.pyc create mode 100644 code/lib/Bio/__pycache__/pairwise2.cpython-37.pyc create mode 100644 code/lib/Bio/_utils.py create mode 100644 code/lib/Bio/bgzf.py create mode 100644 code/lib/Bio/codonalign/__init__.py create mode 100644 code/lib/Bio/codonalign/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/codonalign/__pycache__/chisq.cpython-37.pyc create mode 100644 code/lib/Bio/codonalign/__pycache__/codonalignment.cpython-37.pyc create mode 100644 code/lib/Bio/codonalign/__pycache__/codonseq.cpython-37.pyc create mode 100644 code/lib/Bio/codonalign/chisq.py create mode 100644 code/lib/Bio/codonalign/codonalignment.py create mode 100644 code/lib/Bio/codonalign/codonseq.py create mode 100644 code/lib/Bio/cpairwise2.cp37-win_amd64.pyd create mode 100644 code/lib/Bio/cpairwise2module.c create mode 100644 code/lib/Bio/kNN.py create mode 100644 code/lib/Bio/motifs/__init__.py create mode 100644 code/lib/Bio/motifs/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/alignace.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/clusterbuster.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/mast.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/matrix.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/meme.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/minimal.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/pfm.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/thresholds.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/transfac.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/__pycache__/xms.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/_pwm.c create mode 100644 code/lib/Bio/motifs/_pwm.cp37-win_amd64.pyd create mode 100644 code/lib/Bio/motifs/alignace.py create mode 100644 code/lib/Bio/motifs/applications/__init__.py create mode 100644 code/lib/Bio/motifs/applications/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/applications/__pycache__/_xxmotif.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/applications/_xxmotif.py create mode 100644 code/lib/Bio/motifs/clusterbuster.py create mode 100644 code/lib/Bio/motifs/jaspar/__init__.py create mode 100644 code/lib/Bio/motifs/jaspar/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/jaspar/__pycache__/db.cpython-37.pyc create mode 100644 code/lib/Bio/motifs/jaspar/db.py create mode 100644 code/lib/Bio/motifs/mast.py create mode 100644 code/lib/Bio/motifs/matrix.py create mode 100644 code/lib/Bio/motifs/meme.py create mode 100644 code/lib/Bio/motifs/minimal.py create mode 100644 code/lib/Bio/motifs/pfm.py create mode 100644 code/lib/Bio/motifs/thresholds.py create mode 100644 code/lib/Bio/motifs/transfac.py create mode 100644 code/lib/Bio/motifs/xms.py create mode 100644 code/lib/Bio/pairwise2.py create mode 100644 code/lib/Bio/phenotype/__init__.py create mode 100644 code/lib/Bio/phenotype/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Bio/phenotype/__pycache__/phen_micro.cpython-37.pyc create mode 100644 code/lib/Bio/phenotype/__pycache__/pm_fitting.cpython-37.pyc create mode 100644 code/lib/Bio/phenotype/phen_micro.py create mode 100644 code/lib/Bio/phenotype/pm_fitting.py create mode 100644 code/lib/Building_Literature_Embedding_Model.py create mode 100644 code/lib/CVD_risk_factor_search.py create mode 100644 code/lib/ExpCohort_Generator.py create mode 100644 code/lib/Literature_Data_Collection.py create mode 100644 code/lib/Literature_Data_Preprocessing.py create mode 100644 code/lib/Loading_PudMed.py create mode 100644 code/lib/ML_models.py create mode 100644 code/lib/Medline/__init__.py create mode 100644 code/lib/Medline/__pycache__/__init__.cpython-311.pyc create mode 100644 code/lib/Medline/__pycache__/__init__.cpython-312.pyc create mode 100644 code/lib/Medline/__pycache__/__init__.cpython-37.pyc create mode 100644 code/lib/Moon_gene2vec.py create mode 100644 code/lib/__pycache__/Building_Literature_Embedding_Model.cpython-37.pyc create mode 100644 code/lib/__pycache__/CVD_risk_factor_search.cpython-37.pyc create mode 100644 code/lib/__pycache__/Intrisic_Evaluation.cpython-37.pyc create mode 100644 code/lib/__pycache__/Literature_Data_Collection.cpython-311.pyc create mode 100644 code/lib/__pycache__/Literature_Data_Collection.cpython-312.pyc create mode 100644 code/lib/__pycache__/Literature_Data_Collection.cpython-37.pyc create mode 100644 code/lib/__pycache__/Literature_Data_Preprocessing.cpython-312.pyc create mode 100644 code/lib/__pycache__/Literature_Data_Preprocessing.cpython-37.pyc create mode 100644 code/lib/__pycache__/Loading_PudMed.cpython-311.pyc create mode 100644 code/lib/__pycache__/Loading_PudMed.cpython-312.pyc create mode 100644 code/lib/__pycache__/Loading_PudMed.cpython-37.pyc create mode 100644 code/lib/__pycache__/ML_models.cpython-37.pyc create mode 100644 code/lib/__pycache__/Moon_gene2vec.cpython-37.pyc create mode 100644 code/lib/__pycache__/loading_literature_embedding.cpython-37.pyc create mode 100644 code/lib/__pycache__/step4_CVD_risk_factor_search.cpython-37.pyc create mode 100644 code/lib/loading_literature_embedding.py create mode 100644 code/lib/performance_metrics.py create mode 100644 code/read_me_images/model1_re.jpg create mode 100644 code/read_me_images/model2_re.jpg create mode 100644 code/read_me_images/model3.png create mode 100644 code/read_me_images/model3_re.jpg create mode 100644 code/read_me_images/preprocessing.png create mode 100644 code/read_me_images/table_collection.png create mode 100644 code/read_me_images/table_data.png create mode 100644 code/read_me_images/table_pre.png create mode 100644 code/run create mode 100644 code/step1_data_collection.py create mode 100644 code/step1_data_collection_Custom_Luis.py create mode 100644 code/step2_data_preprocessing.py create mode 100644 code/step3_literature_embedding_training.py create mode 100644 code/step4_CVD_risk_factor_identification.py create mode 100644 code/step_1_data_collection_Luis.py create mode 100644 code/step_1_data_collection_Luis_.py create mode 100644 environment/Dockerfile create mode 100644 error_log.txt create mode 100644 metadata/metadata.yml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e0116c1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ + +data/gene_name_info/query_full_name.txt +data/gene_name_info/query_ids.txt +data/gene_name_info/query_snps.txt +data/gene_name_info/query_symbol.txt +results/baseline_doc/pubmed.zinc.0.15.txt +results/baseline_doc/pubmed.zinc.1.15.txt diff --git a/REPRODUCING.md b/REPRODUCING.md new file mode 100644 index 0000000..3146537 --- /dev/null +++ b/REPRODUCING.md @@ -0,0 +1,36 @@ +This [Code Ocean](https://codeocean.com) Compute Capsule will allow you to reproduce the results published by the author on your local machine1. Follow the instructions below, or consult [our knowledge base](https://help.codeocean.com/user-manual/sharing-and-finding-published-capsules/exporting-capsules-and-reproducing-results-on-your-local-machine) for more information. Don't hesitate to reach out to [Support](mailto:support@codeocean.com) if you have any questions. + +1 You may need access to additional hardware and/or software licenses. + +# Prerequisites + +- [Docker Community Edition (CE)](https://www.docker.com/community-edition) +- [nvidia-container-runtime](https://docs.docker.com/config/containers/resource_constraints/#gpu) for code that leverages the GPU +- MATLAB/MOSEK/Stata licenses where applicable + +# Instructions + +## The computational environment (Docker image) + +This capsule is private and its environment cannot be downloaded at this time. You will need to rebuild the environment locally. + +> If there's any software requiring a license that needs to be run during the build stage, you'll need to make your license available. See [our knowledge base](https://help.codeocean.com/user-manual/sharing-and-finding-published-capsules/exporting-capsules-and-reproducing-results-on-your-local-machine) for more information. + +In your terminal, navigate to the folder where you've extracted the capsule and execute the following command: +```shell +cd environment && docker build . --tag 6ef700ed-ff07-4a42-bf13-65d4165511b6; cd .. +``` + +> This step will recreate the environment (i.e., the Docker image) locally, fetching and installing any required dependencies in the process. If any external resources have become unavailable for any reason, the environment will fail to build. + +## Running the capsule to reproduce the results + +In your terminal, navigate to the folder where you've extracted the capsule and execute the following command, adjusting parameters as needed: +```shell +docker run --platform linux/amd64 --rm --gpus all \ + --workdir /code \ + --volume "$PWD/data":/data \ + --volume "$PWD/code":/code \ + --volume "$PWD/results":/results \ + 6ef700ed-ff07-4a42-bf13-65d4165511b6 bash run +``` diff --git a/code/Extrinsic_application_CVD_prediction.py b/code/Extrinsic_application_CVD_prediction.py new file mode 100644 index 0000000..9617b75 --- /dev/null +++ b/code/Extrinsic_application_CVD_prediction.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Aug 30 21:59:06 2022 + +@author: Jihye Moon +""" +import sys +import os +import pathlib + +import pandas as pd +import numpy as np + +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import StratifiedShuffleSplit as strata + +import lib.ML_models as ml +sys.path.append('lib') +import loading_literature_embedding as emb + +def data_split(X_train_index, X_test_index, X, y): + valid_data = int(len(X_test_index)/2) + test_data = int(len(X_test_index))-valid_data + + test = X_test_index[0:test_data]; valid = X_test_index[test_data:test_data+valid_data] + + X_train = X[X_train_index]; X_test = X[test]; X_valid = X[valid] + + y_train = y[X_train_index] + y_test = y[test] + y_valid = y[valid] + + X_train = np.reshape(X_train, (X_train.shape[0], -1)); X_test = np.reshape(X_test, (X_test.shape[0], -1)) + X_valid = np.reshape(X_valid, (X_valid.shape[0], -1)) + y_train = np.squeeze(y_train); y_test = np.squeeze(y_test); y_valid = np.squeeze(y_valid) + + scaler = StandardScaler() + scaler.fit(X_train) + X_train = scaler.transform(X_train); X_test = scaler.transform(X_test); X_valid = scaler.transform(X_valid) + return X_train, X_test, X_valid, y_train, y_test, y_valid + +def loading_variable_embedding(data_path): + var_symbol = list(pd.read_csv(data_path+'/variables_symbol.csv').drop(columns='Unnamed: 0')['0']) + var_name = list(pd.read_csv(data_path+'/variables_preprocessed_names.csv').drop(columns='Unnamed: 0')['0']) + tar_symbol = list(pd.read_csv(data_path+'/target_variables_symbol.csv').drop(columns='Unnamed: 0')['0']) + tar_name = list(pd.read_csv(data_path+'/target_variables_preprocessed_names.csv').drop(columns='Unnamed: 0')['0']) + + variables_indexing={}; disease_variables_indexing={} + + for i in range(len(var_name)): + variables_indexing[var_symbol[i]] = var_name[i] + + for i in range(len(tar_name)): + disease_variables_indexing[tar_symbol[i]] = tar_name[i] + + additional_dictionary = {'uricosurics':'uricosuric'} + # If some variable names are very unique that can't find in embedding vocabulary, + # add the unique variable names here to avoid error for feature selection tasks + + embedding_list, index2variables, embedding, removal, removed_words = emb2simi.variable2embed(words_list, syn0norm, variables_indexing, additional_dictionary) + + if removal==[]: + print(" === NO problem for your variables") + target_embedding_list, index2target, target_embedding, _, _ = emb2simi.variable2embed(words_list, syn0norm, disease_variables_indexing, additional_dictionary) + + return embedding_list, variables_indexing, disease_variables_indexing, additional_dictionary, \ + target_embedding_list, index2target, index2variables, target_embedding, embedding + else: + print(" === Check if there are errors for your variable names") + return 0, 0, 0, 0, 0, 0, 0, 0, 0 + +def CVD_Prediction_with_FS_DR(data_path, Xt, y): + feature_size = 128; i=0 + split_info = strata(n_splits=5, test_size=0.2, random_state=12) + total_FS_Pre=[]; total_FS_prob=[] + total_DR_pre=[]; total_DR_prob=[] + embedding_list, variables_indexing, disease_variables_indexing, additional_dictionary, target_embedding_list, index2target, index2variables, target_embedding, embedding = loading_variable_embedding(data_path) + for X_train_index, X_test_index in split_info.split(Xt.values, y): + result_dir = os.path.join(output_path +str(i)) + pathlib.Path(result_dir).mkdir(parents=True, exist_ok=True) + X_train, X_test, X_valid, y_train, y_test, y_valid = data_split(X_train_index, X_test_index, Xt.values, y) + pr.save_label(y_test, 'CVD_label', result_dir) # y_test labels to evaludate CVD prediction performance for each fold + print("=== run Our feature selector --- our FS selected features via feature name , our FS uses same feature set for 5-fold cross validation. ") + embed_name = fs.Our_FS(emb2simi, str(i)+'rf_embedding_features', embedding_list, variables_indexing, disease_variables_indexing, additional_dictionary, embedding, target_embedding_list, index2target, index2variables, target_embedding, feature_size, result_dir) + + print("=== run Our dimensionality reductor ") + A1, A2, A3 = dr.Our_DR(embedding, X_train, X_test, X_valid, feature_size) + + print("=== Running with MLs with Feature Selection (Our FS)") + X2 = Xt[embed_name].values ### selecting only 128 variables based on our 128 features + valid_data = int(len(X_test_index)/2); test_data = int(len(X_test_index))-valid_data + test = X_test_index[0:test_data]; valid = X_test_index[test_data:test_data+valid_data] # split test data + X_train2 = X2[X_train_index]; X_test2 = X2[test]; X_valid2 = X2[valid] + + X_train2 = np.reshape(X_train2, (X_train2.shape[0], -1)) + X_test2 = np.reshape(X_test2, (X_test2.shape[0], -1)) + X_valid2 = np.reshape(X_valid2, (X_valid2.shape[0], -1)) + + scaler = StandardScaler() + scaler.fit(X_train2) + X_train2 = scaler.transform(X_train2); X_test2 = scaler.transform(X_test2); X_valid2 = scaler.transform(X_valid2) + + Our_FS_total_prediction, Our_FS_total_prob = pr.run_save(X_train2, y_train, X_test2, y_test, X_valid2, y_valid, 'FS.embedding', 'SMOTE', feature_size, result_dir) + total_FS_Pre.append(Our_FS_total_prediction); total_FS_prob.append(Our_FS_total_prob) + print("=== Running MLs with Dimensionality Reduction (Our DR)") + Our_DR_total_prediction, Our_DR_total_prob = pr.run_save(A1, y_train, A2, y_test, A3, y_valid, 'DR.embedding', 'SMOTE', feature_size, result_dir) + total_DR_pre.append(Our_FS_total_prediction); total_DR_prob.append(Our_FS_total_prob) + i+=1 + print('all results are saved in ', output_path) + return total_FS_Pre, total_FS_prob, total_DR_pre, total_DR_prob + +data_path = '../data/Example' +model_path = '../data/old_model' +output_path = '../results/prediction/' + +fs = ml.feature_selectors() +dr = ml.dimension_reducers() +pr = ml.predictors() + +gene_name = '../data/gene_name_info/query_full_name'; gene_symb='../data/gene_name_info/query_symbol' +emb2simi=emb.embedding_vector() + +words_list, index2word, syn0norm, _ = emb2simi.setting(model_path, gene_symb) + +Xt = pd.read_csv(data_path+'/Example_X.csv').drop(columns='Unnamed: 0') +y = pd.read_csv(data_path+'/Example_y.csv').drop(columns='Unnamed: 0').values + +total_FS_Pre, total_FS_prob, total_DR_pre, total_DR_prob = CVD_Prediction_with_FS_DR(data_path, Xt, y) diff --git a/code/LICENSE b/code/LICENSE new file mode 100644 index 0000000..08320cf --- /dev/null +++ b/code/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Jihye Moon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/code/README.md b/code/README.md new file mode 100644 index 0000000..c87cead --- /dev/null +++ b/code/README.md @@ -0,0 +1,266 @@ +#### **A Literature Embedding Model for Cardiovascular Disease Prediction using Risk Factors, Symptoms, and Genotype Information** +##### Authors: Jihye Moon, Hugo F. Posada-Quintero, and *Ki. H. Chon +**Contact address**: ki.chon@uconn.edu (*Corresponding author), jihye.moon@uconn.edu (Q&A for code). +(Accepted by Expert System with Application at August 24, 2022) + +### Contents + +This capsule aims to provide the implementation of 1) **Literature data collection and preprocessing**, and 2) **Literature embedding model training and evaluation**. The pre-trained Literature embedding model aims to identify CVD risk factors and associated information for a given input query (i.e., stroke). Also, since our literature embedding model contains representations for CVD-related words, our literature model can work as **Feature Selection (FS) and Dimensionality Reduction (DR) models on cohort data** for CVD prediction/classification tasks (extrinsic method). We used MESA cohort data consisting of 6,814 subjects and 564 variables in our manuscript. Since our cohort data requires permission, users are required to prepare their cohort data to use the literature embedding model as FS or DR tasks. The cohort data are required to have variables per subject and the variables' name. This capsule's guideline also provides a pipeline for the FS and DR for input cohort data. + +1. [Introduction](#introduction) +2. [Code Implementations and Guidelines](#gudelines) + 0. [DEMO](#demo) + 1. [DEMO A) CVD risk factors, genes, and associated information identifications](#demo1") + 2. [DEMO B) All steps to build a literature embedding model (data collection ~ model training](#demo2) + 3. [Reproduction DEMO](#default_demo) + 1. [Literature data collection](#collection) + 2. [Literature data preprocessing](#preprocessing) + 3. [Literature embedding model training](#training) + 4. [Literature embedding model evaluation](#evaluation) + 5. [FS and DR applications on cohort data](#applications) +3. [Results](#results) +4. [GitHub Source](#github) + +### 1. Introduction +Accurate prediction of cardiovascular disease (CVD) requires multifaceted information consisting of not only a patient’s medical history, but genomic data, symptoms, lifestyle, and risk factors which are often not incorporated into a decision-making process as the data are vast, difficult to obtain, and require complex algorithms. **Estimating CVD risk factors is now a significant goal for more accurate CVD prediction and treatment**. +##### Previous work's limitation +CVD risk factors can be identified from phenotype variables, genetic arrays, text, and image data. Several approaches have been introduced to identify CVD risk factors that are categorized as: (1) cohort-based CVD risk factor identification, and (2) literature-based CVD risk factor identification and information management. Category (1) enables objective validation of the identified risk factors using CVD patient data, but the number of available features is limited, which may limit the identification of new CVD risk factors. Category (2) enables the management of significant risk factors using publicly-available literature data, however, most methods were not validated using CVD patient data. Hence, **it is critical to developing a novel method to collect information on the risk factors, associated symptoms, and mechanisms, but it needs to be objectively validated using CVD patients to be relevant for better clinical diagnosis and treatment management.** +##### Our proposed work +In our paper, **we proposed a literature embedding model that trained using literature data freely accessible online.** Our model enables the retrieval of CVD risk factors, associated information, and genes independently from population-based data. Even though our literature model was trained using literature, our model enables selecting accurate CVD-related features from the population-based cohort data as FS and DR models, which involves better CVD prediction. + +### 2. Code implementation and guidelines +This section provides descriptions for [0. Demo](#demo) and details for [1. Literature data collection](#collection), [2. Literature data preprocessing](#preprocessing), [3. Literature embedding model training](#training), [4. Literature embedding model evaluation](#evaluation), and [5. FS and DR applications on cohort data](#applications). +DEMO shows an overall for our codes, and the other five subsections show the details of the codes for each purpose. + +We prepared five main codes for each goal: + 1) step1_data_collection.py, + 2) step2_data_preprocessing.py, + 3) step3_literature_embedding_training.py, + 4) step4_CVD_risk_factor_identification.py, + 5) Extrinsic_application_CVD_prediction.py. + +We feed different inputs to each main code for each purpose. Details are described below. + +#### 2.0. DEMO + +We prepared three DEMOs: + 1) **DEMO A**: It provides **CVD risk factors, genes, and associated information identifications** using a pre-trained literature model. + 2) **DEMO B**: It provides all steps for **literature data collection**, **literature data preprocessing**, and **literature embedding model training and intrinsic evaluation (CVD risk factor identifications)** + 3) **Reproduction DEMO**: It shows DEMO A's results and provides **literature embedding model training and evaluation steps**. + +In the CodeOcean platform, the DEMO A is the default. + +##### 2.0.1. DEMO A) CVD risk factors, genes, and associated information identifications +To run DEMO A, run the following command: +~~~~ {.sourceCode .shell} +./run.sh 'demo_a' +~~~~ + +The command imports our pre-trained literature embedding model at EMBEDDING_PATH='../data/old_model' and captures CVD risk factors and associated information for three queries ('stroke', 'atrial fibrillation, 'ventricular fibrillation'). +The input query-related risk factors, associated information, and gene names will be displayed and saved in STEP4_OUTPUT_PATH='../results/demo_a'. + +##### 2.0.2. DEMO B) All steps to build a literature embedding model (data collection ~ model training) +To run Demo-b, run the following command at **your local computer**: +~~~~ {.sourceCode .shell} +./run.sh 'demo_b' +~~~~ +Demo b) provides all steps for literature data collection & preprocessing, literature embedding model training & evaluation for CVD risk factor identifications. This DEMO B provides a limited number of collected literature data. When users want to get all data, users are required to set NUM_WORD_BASED_DATA=0 and NUM_GENE_BASED_DATA=0. + +~~~~ {.sourceCode .shell} +./run.sh 'demo_b' + echo 'demo b -- ' + QUERY_WORD='zinc' ## you can define query word to collect literature data + NUM_WORD_BASED_DATA=500000 #if NUM_WORD_BASED_DATA=0, it collects all possible gene-related literature + NUM_GENE_BASED_DATA=100 #if NUM_GENE_BASED_DATA=0, it collects all possible gene-related literature + BASE_PATH='../results/' + DATA_COLLECTION_PATH='../results/demo_b' + PREPROCESSEING_PATH='../results/demo_b' + EMBEDDING_NAME='pre_trained_demo' + EMBEDDING_PATH='../results/pre_trained_demo' + EPOCH=2 + STEP4_OUTPUT_PATH='../results/CVD_searches' + + python -u step1_data_collection.py $QUERY_WORD $NUM_WORD_BASED_DATA $NUM_GENE_BASED_DATA $DATA_COLLECTION_PATH + python -u step2_data_preprocessing.py $DATA_COLLECTION_PATH $PREPROCESSEING_PATH + python -u step3_literature_embedding_training.py $PREPROCESSEING_PATH $EPOCH $EMBEDDING_NAME + python -u step4_CVD_risk_factor_identification.py $EMBEDDING_NAME $STEP4_OUTPUT_PATH +~~~~ + +This DEMO B generates the collected literature data, the pre-processed literature data, and the trained literature embedding model at './results'. + +##### 2.0.3. Reproduction DEMO + +The reproduction DEMO is operated by the following command: +~~~~ {.sourceCode .shell} +./run.sh + +or + +./run.sh 'demo_r' +~~~~ + +This reproduction DEMO shows 1) CVD risk factor identifications using our paper's pre-trained literature model and 2) all steps for a literature model training process and risk factor searches using the newly pre-trained model. +We prepared a collected literature data set at PREPROCESSEING_PATH='../data/old_preprocessed_data' for 2). + +#### 2.1. Literature data collection +This subsection explains details for step1_data_collection.py. The code recieves four input: + +~~~~ {.sourceCode .shell} + QUERY_WORD='zinc' + NUM_WORD_BASED_DATA=0 + NUM_GENE_BASED_DATA=0 + DATA_COLLECTION_PATH='../results/$USER_DIFINED' + + python -u step1_data_collection.py $QUERY_WORD $NUM_WORD_BASED_DATA $NUM_GENE_BASED_DATA $DATA_COLLECTION_PATH +~~~~ + +In our manuscript, we collected 16k published literature from PubMed using search keywords consisting of a word (“heart”) and human gene names, then trained a literature embedding model using the collected abstracts. Below table 1. shows an example of collected abstracts by this code. + +*Table 1. An example of collected abstracts* +|
Document type
|
Keyword
|
Example
| +|:---|:---|:---| +|Keyword-based Literature from PubMed|Heart|Waist-to-hip ratio (WHR) is a strong predictor of mortality in patients with **heart** failure (HF). Left ventricular diastolic filling function has predictable maturational progression, with significant differences in the intraventricular pressure difference between infants from birth to 2 years. | +|Gene Name-based Literature from PubMed|HMGA1|**HMGA1** has been shown to regulate genes involved with systemic inflammatory processes. We hypothesized that **HMGA1** is important in the function of mesenchymal stromal cells, which are known to modulate inflammatory responses due to sepsis.| + +We can change a number of collectible documents: +~~~ +If NUM_WORD_BASED_DATA==0: + It collects all documents for $QUERY_WORD. +elif NUM_WORD_BASED_DATA==100000: + It collects 10,0000 documents for $QUERY_WORD. + +If NUM_GENE_BASED_DATA==0: + It collects all documents for $QUERY_WORD. +elif NUM_GENE_BASED_DATA==10: + It collects documents for 10*NUM_GENE_BASED_DATA gene names. +~~~ + +#### 2.2. Literature data preprocessing +This subsection explains details for step2_data_preprocessing.py. The code recieves two inputs: +~~~ + DATA_COLLECTION_PATH='../results/$USER_DIFINED' + PREPROCESSEING_PATH='../results/$USER_DIFINED' + + python -u step2_data_preprocessing.py $DATA_COLLECTION_PATH $PREPROCESSEING_PATH +~~~ + +*Table 2. An example of text preprocessing* +|
Document
|
Gene Name
| Sentence | +|:---|:---|:---| +| Original | HMGA1 | Mesenchymal stromal cells expressing a dominant-negative high mobility group A1 transgene exhibit improved function during sepsis. | +| Pre-processed | #HMGA1 | mesenchymal stromal cells expressing dominant-negative high mobility group a# transgene exhibit improved function sepsis | + + +#### 2.3. Literature embedding model training +This subsection explains details for step3_literature_embedding_training.py. The code recieves three inputs: +~~~ + EMBEDDING_PATH='../results/$MODEL_PATH' + EPOCH=2 # setting the number of ecoch for literature embedding model + + python -u step3_literature_embedding_training.py $PREPROCESSEING_PATH $EPOCH $EMBEDDING_PATH + ~~~ +EMBEDDING_PATH is embedding model path and EPOCH is the number of epoch. EPOCH=10 is recommanded. +Our literature embedding model trains literature representations by the following three steps. To train 'heart'-related literature, the model trains a basic skip-gram structure as shown in Fig.1(a). To train gene-name-related literature, the model uses Fig.(b) and (c) structures. + +Fig. 1. Skip-gram structure of Word2vec +| (a) step 1| (b) step 2| (c) step 3| +| :--- | :--- | :--- | +| ![image](read_me_images/model1_re.jpg)|![image](read_me_images/model2_re.jpg)|![image](read_me_images/model3_re.jpg)| +|Skip-gram structure to predict context words using a center word in the same document|Our proposed structure (1) to predict captured document's word contexts with gene name that used as search query |Our proposed structure (2) to predict gene-name-associated words in captured document using gene name| + +Users can set hyper-parameters in step3_literature_embedding_training.py: +~~~~ {step3_literature_embedding_training.py} + window_size = 2 # The number of contexts per center word for literature model training. Details are in our manuscript. + min_count = 5 # Words with Appreacnce frequency in the document is fewer than min_count = 5, + min_size = 2 # Words that have character size <= min_size = 3 will be excluded. + dimension = 128 # Embedding model's dimension + num_sampled = 16 # Negative sampling parameter + batch_size = 256 # +~~~~ + +#### 2.4. Literature embedding model evaluation (CVD risk factor searches) +This subsection explains details for step4_CVD_risk_factor_identification.py. The code recieves two inputs: + +~~~~ {.sourceCode .shell} + EMBEDDING_PATH='../results/$MODEL_PATH' + STEP4_OUTPUT_PATH='../results/$SEARCH_PATH' + python -u step4_CVD_risk_factor_identification.py $EMBEDDING_PATH $STEP4_OUTPUT_PATH +~~~~ + +Users can put their query in step4_CVD_risk_factor_identification.py like below: + +~~~~ {.sourceCode .python} +queries = ['stroke', 'atrial fibrillation', 'ventricular fibrillation'] #put your own query in [] +~~~~ + +#### 2.5. FS and DR applications on cohort data +This subsection explains details for Extrinsic_application_CVD_prediction.py. The code has three inputs in Extrinsic_application_CVD_prediction.py: + * data_path = '../data/Example' + * model_path = '../data/old_model' + * output_path = '../results/prediction/' + +Users are required to prepare cohort data, pre-trained embedding model paths, and output paths. After users run Extrinsic_application_CVD_prediction.py with the user's cohort data, users will get prediction results and label after our FS and DR processes for each K-fold at output_path. Then evaluate CVD prediction performance using performance_metrics.metric(label, prediction_results) in lib/performance_metrics.py. All other ML methods-FS, DT, H2FS, PCA, and UMAP are in lib/ML_models.py file. + +##### Cohort data format ###### + +Users are required to prepare cohort data with variable names. To show the format of input data, we generated example data Example_X (variables per subject) and Example_y (CVD labels per subject) using lib/ExpCohort_Generator.py. Details are in the lib/ExpCohort_Generator.py file. + +The **format** of input cohort data (Example_X) have should be like below: + +*Table 3. The data format example generated by ExpCohort_Generator.py (variable)* +| Subject | bca | nit | fhha | sbld | pulrate | +|-----|----------|----------|----------|----------|----------| +| 0 | 0.296735 | 0.292552 | 0.074269 | 0.886255 | 0.235104 | +| 1 | 0.699152 | 0.626459 | 0.917815 | 0.988134 | 0.167721 | +| 2 | 0.484408 | 0.327285 | 0.351393 | 0.946728 | 0.366808 | +| 3 | 0.970385 | 0.811354 | 0.068369 | 0.246754 | 0.198345 | +| .. | ... | ... | ... | ... | ... | +| N | 0.905146 | 0.855485 | 0.657306 | 0.385825 | 0.957396 | + +The **format** of CVD label per subject (Example_y) should be like below: + +*Table 4. The cohort data format example generated by ExpCohort_Generator.py (label)* +|
Subject
|
CVD (Yes=1, no=0)
| +|:---|:---| +| 1 | 0 | +| 2 | 1 | +| 3 | 1 | +| ... | ... | +| N | 0 | + +### 3. Results + +In our manuscript, we used three queries ('stroke', 'atrial fibrillation, 'ventricular fibrillation') for CVD risk factor identifications. We analyzed whether or not the captured words and genes were correctly identified as risk factors and associated symptoms for the input query words. Our model accurately (average accuracy of >96%) captured associated risk factors, symptoms, and genes for a given input query word. Details are described in our published manuscript. + +We also used our embedding model as FS and DR tasks on cohort data for CVD prediction. Our FS and DR method provides better performance with the fastest computation time when compared with other popular FS and DR methods - Random Forest, Decision Tree, H2FS, UMAP, and PCA. + +Our model has the potential to facilitate easier collation of multifaceted information for better data mining of vast publicly available data so that efficient and accurate risk factors and symptoms can be identified, which helps better-informed decisions for CVD prediction and treatment. + +### 4. GitHub Source +------------- + +This project is also hosted on GitHub ([link](https://github.com/JihyeMooon/CVD_literature_embedding)) and is actively developed. + +### Error note +In the literature data collection process, some errors can happen due to network connecions. + +if you get errors at 25/33 point from 'collecting_doc_using_word_based_query' like below: +~~~ + 25 / 33 + Going to download records from 1250001 to 1260000 + Going to download records from 1260001 to 1270000 + + raise HTTPError(req.full_url, code, msg, hdrs, fp) + or IncompleteRead: IncompleteRead(20458171 bytes read) +~~~ +Then run collecting_doc_using_word_based_query agian, with 'w2d_starting_point = 25' + +If you have problems from 'collecting_doc_using_gene_based_query' like below: +~~~ + Example: if we get error at 5 / 2634 +~~~ +Then run collecting_doc_using_gene_based_query agian, with 'g2d_starting_point = 5' + + + + \ No newline at end of file diff --git a/code/gene_extraction.py b/code/gene_extraction.py new file mode 100644 index 0000000..c90b3b9 --- /dev/null +++ b/code/gene_extraction.py @@ -0,0 +1,134 @@ +from Bio import Entrez +from Bio import SeqIO +import time +from urllib.error import HTTPError +from http.client import IncompleteRead + +# Set your email address for Entrez +Entrez.email = "lrmercadod@gmail.com" +Entrez.api_key = "f095f0c0aad9480d90ee0b869acb43670d08" + +# Search for human genes in the Nucleotide database +handle = Entrez.esearch(db="gene", term="Homo sapiens[Organism]", retmax=10000000) +human_record = Entrez.read(handle) +handle.close() + +# Search for human ZIP11 gene +handle = Entrez.esearch(db="gene", term="ZIP11 AND Homo sapiens[Organism]", retmax=10000000) +human_zip11_record = Entrez.read(handle) +handle.close() + +# Search for mouse ZIP11 gene +handle = Entrez.esearch(db="gene", term="ZIP11 AND Mus musculus[Organism]", retmax=10000000) +mouse_zip11_record = Entrez.read(handle) +handle.close() + +# Get the list of gene IDs +human_gene_ids = human_record["IdList"] +human_zip11_ids = human_zip11_record["IdList"] +mouse_zip11_ids = mouse_zip11_record["IdList"] + +# Combine all gene IDs +gene_ids = human_gene_ids + human_zip11_ids + mouse_zip11_ids + +# Open the output files +symbol_file = open("query_symbol.txt", "a", encoding="utf-8") # Append mode +id_file = open("query_ids.txt", "a", encoding="utf-8") # Append mode +full_name_file = open("query_full_name.txt", "a", encoding="utf-8") # Append mode +snp_file = open("query_snps.txt", "a", encoding="utf-8") # Append mode +error_file = open("error_log.txt", "a", encoding="utf-8") # Append mode for error logging + +max_retries = 5 +retry_delay = 2 +batch_size = 500 +batch_delay = 2 + +# Load the last processed batch from the checkpoint file +checkpoint_file = "checkpoint.txt" +try: + with open(checkpoint_file, "r") as file: + last_processed_batch = int(file.read()) +except FileNotFoundError: + last_processed_batch = 0 + +# Iterate over the gene IDs in batches and fetch the gene information +for i in range(last_processed_batch * batch_size, len(gene_ids), batch_size): + batch_ids = gene_ids[i:i+batch_size] + + for gene_id in batch_ids: + retries = 0 + while retries < max_retries: + try: + handle = Entrez.efetch(db="gene", id=gene_id, retmode="xml") + gene_record = Entrez.read(handle) + handle.close() + break + except (HTTPError, IncompleteRead) as e: + print(f"Error: {str(e)}. Retrying...") + retries += 1 + time.sleep(retry_delay) + else: + print(f"Failed to fetch gene information for gene ID: {gene_id}") + continue + + # Extract the relevant information + if "Entrezgene_gene" in gene_record[0] and "Gene-ref" in gene_record[0]["Entrezgene_gene"]: + gene_ref = gene_record[0]["Entrezgene_gene"]["Gene-ref"] + gene_symbol = gene_ref.get("Gene-ref_locus", "") + gene_full_name = gene_ref.get("Gene-ref_desc", "") + else: + gene_symbol = "" + gene_full_name = "" + + # Retrieve SNP information for the gene + retries = 0 + while retries < max_retries: + try: + handle = Entrez.elink(dbfrom="gene", db="snp", id=gene_id) + snp_record = Entrez.read(handle) + handle.close() + + if snp_record[0]["LinkSetDb"]: + snp_ids = [link["Id"] for link in snp_record[0]["LinkSetDb"][0]["Link"]] + for snp_id in snp_ids: + try: + snp_file.write(str(snp_id) + "\n") + except OSError as e: + error_file.write(f"Error writing SNP ID {snp_id} for gene ID {gene_id}: {str(e)}\n") + else: + try: + snp_file.write("N/A\n") + except OSError as e: + error_file.write(f"Error writing 'N/A' to snp_file for gene ID {gene_id}: {str(e)}\n") + break + except (IndexError, RuntimeError, IncompleteRead) as e: + print(f"Error retrieving SNP information for gene ID: {gene_id}. Retrying...") + retries += 1 + time.sleep(retry_delay) + else: + print(f"Failed to retrieve SNP information for gene ID: {gene_id}") + try: + snp_file.write("N/A\n") + except OSError as e: + error_file.write(f"Error writing 'N/A' to snp_file for gene ID {gene_id}: {str(e)}\n") + + # Write the information to the respective files + symbol_file.write(gene_symbol + "\n") + id_file.write(gene_id + "\n") + full_name_file.write(gene_full_name + "\n") + + # Update the checkpoint file with the last processed batch + with open(checkpoint_file, "w") as file: + file.write(str(i // batch_size)) + + print(f"Processed batch {i//batch_size + 1} of {len(gene_ids)//batch_size + 1}") + time.sleep(batch_delay) + +# Close the output files +symbol_file.close() +id_file.close() +full_name_file.close() +snp_file.close() +error_file.close() + +print("Gene extraction completed.") \ No newline at end of file diff --git a/code/lib/Bio/Affy/CelFile.py b/code/lib/Bio/Affy/CelFile.py new file mode 100644 index 0000000..ee95b0d --- /dev/null +++ b/code/lib/Bio/Affy/CelFile.py @@ -0,0 +1,502 @@ +# Copyright 2004 by Harry Zuzan. All rights reserved. +# Copyright 2016 by Adam Kurkiewicz. All rights reserved. +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. + +"""Reading information from Affymetrix CEL files version 3 and 4.""" + + +import struct + +try: + import numpy +except ImportError: + from Bio import MissingPythonDependencyError + + raise MissingPythonDependencyError( + "Install NumPy if you want to use Bio.Affy.CelFile" + ) from None + + +class ParserError(ValueError): + """Affymetrix parser error.""" + + def __init__(self, *args): + """Initialise class.""" + super().__init__(*args) + + +class Record: + """Stores the information in a cel file. + + Example usage: + + >>> from Bio.Affy import CelFile + >>> with open("Affy/affy_v3_example.CEL") as handle: + ... c = CelFile.read(handle) + ... + >>> print(c.ncols, c.nrows) + 5 5 + >>> print(c.intensities) + [[ 234. 170. 22177. 164. 22104.] + [ 188. 188. 21871. 168. 21883.] + [ 188. 193. 21455. 198. 21300.] + [ 188. 182. 21438. 188. 20945.] + [ 193. 20370. 174. 20605. 168.]] + >>> print(c.stdevs) + [[ 24. 34.5 2669. 19.7 3661.2] + [ 29.8 29.8 2795.9 67.9 2792.4] + [ 29.8 88.7 2976.5 62. 2914.5] + [ 29.8 76.2 2759.5 49.2 2762. ] + [ 38.8 2611.8 26.6 2810.7 24.1]] + >>> print(c.npix) + [[25 25 25 25 25] + [25 25 25 25 25] + [25 25 25 25 25] + [25 25 25 25 25] + [25 25 25 25 25]] + + """ + + def __init__(self): + """Initialize the class.""" + self.version = None + self.GridCornerUL = None + self.GridCornerUR = None + self.GridCornerLR = None + self.GridCornerLL = None + self.DatHeader = None + self.Algorithm = None + self.AlgorithmParameters = None + self.NumberCells = None + self.intensities = None + self.stdevs = None + self.npix = None + self.nrows = None + self.ncols = None + self.nmask = None + self.mask = None + self.noutliers = None + self.outliers = None + self.modified = None + + +def read(handle, version=None): + """Read Affymetrix CEL file and return Record object. + + CEL files format versions 3 and 4 are supported. + Please specify the CEL file format as 3 or 4 if known for the version + argument. If the version number is not specified, the parser will attempt + to detect the version from the file contents. + + The Record object returned by this function stores the intensities from + the CEL file in record.intensities. + Currently, record.mask and record.outliers are not set in when parsing + version 4 CEL files. + + Example Usage: + + >>> from Bio.Affy import CelFile + >>> with open("Affy/affy_v3_example.CEL") as handle: + ... record = CelFile.read(handle) + ... + >>> record.version == 3 + True + >>> print("%i by %i array" % record.intensities.shape) + 5 by 5 array + + >>> with open("Affy/affy_v4_example.CEL", "rb") as handle: + ... record = CelFile.read(handle, version=4) + ... + >>> record.version == 4 + True + >>> print("%i by %i array" % record.intensities.shape) + 5 by 5 array + + """ + try: + data = handle.read(0) + except AttributeError: + raise ValueError("handle should be a file handle") from None + data = handle.read(4) + if not data: + raise ValueError("Empty file.") + if data == b"[CEL": + raise ValueError("CEL file in version 3 format should be opened in text mode") + if data == "[CEL": + # Version 3 format. Continue to read the header here before passing + # control to _read_v3 to avoid having to seek to the beginning of + # the file. + data += next(handle) + if data.strip() != "[CEL]": + raise ValueError("Failed to parse Affy Version 3 CEL file.") + line = next(handle) + keyword, value = line.split("=", 1) + if keyword != "Version": + raise ValueError("Failed to parse Affy Version 3 CEL file.") + version = int(value) + if version != 3: + raise ValueError("Incorrect version number in Affy Version 3 CEL file.") + return _read_v3(handle) + try: + magicNumber = struct.unpack("Epqb|%?m|Eu+5`QKyHn(>S^EBENnk~M32$F`)gc4dv%^3RyI)1bQ860H_n zF0yT@!A5}b?9ObEEPM!p#V)dtfFM9FIqV_WLykcZEEYLzE(0RiLvo37OoAK+L6Gki z$?lddd3S*%YO(mL`0CZGS5@ziyw=r~mGCzq<;xojlJw71iT~07KEmbx7zL9IiOEc< z%7(m^7m#8o_a*0jRk9LmS$<8H=!YkXs=BTjnk-owQ(vIZXUYS~Q9hMEmYBwppGz!h zC0`_r6zVCKMm_yPG19D;Wmxue*~qXQ%YQBzS=Pl0xaX`q>t0j%pMZ8xwQId#6hOQ1 zf<{x8q`|(%mD`rdY-dHcol1?bo1R^B^a`)7>sKq4t#!-e_NIP)c0sS$Rm;_%S==Rz zG2L{SK3?3#{G`P}IXHFGc3sf9wdJkWoEdAwa+u|mw`Mu7@y5u!<9cSbsxQ{pZ*A#z zMc=B`_2;JJ>0V8*yOw^rFQf{D#1fyU z4fP92l?)A&QiI$rle-oV$3E6EkqyzVTf|hcEJtJ&-04c;a(hsC(hKQ{?8`qCs{)cr zqj%o1J=?6>;I&*eUAMS6s05l@->`U)E|qLNE|t)vx>mIks3u==A(l+3B(O`RzmUHE z_Vo`R-34FnqbK(I$kg!gHOE}HN~IuMDy`R8y-IMtRC-!BtI?BG zsl;mKQb`aEx+0d#R<(+@49JUOQ0BC)IqiB*D{>TEr}LV9j{t9lK4^!$JjtO@Cso{g2PkZ2Jbeqc~HU40eX zybAI(VzF4Hn_kwZV}eDF({nyV8jO)zqik^7@y?fvPPtZfhjakE_S_BeF6tLw!vnVE zVCiiuLVEZR!;OxOi*ZLLhjAYr9hsaA>l4(D`tW%1v7n_B`pBhA;bUX*j$MV}fIkH;@w4AG}T^s(XLebFzChUjB^MjM_QzqkiIq&hq%cn5Qng6H9h;b@eJ zOU1__^Rz6nz+8`6&-Tb56oC#=T*M?MCZ+_F`p8sqQrE{OCPs>*9Wof5Dqf(s3S9TEaF zqZ1<|5mOVz3Dhr*4AW?U#)~8S$iUgKH^Z&+@Zso1z2iTo-O+@{{|l{;!&B7Ki=-dv z6kn%P{0s$?eW?_dvVft4(UK6QDoRb1Nl~UmnHFV6lvz>cLSbp+7xn?H~v1wQO-HlPW45r!`21J2p^h)dEeNbKL0MZ+d4N?wxsL7MQiXb3Q0-Izxmhb4z+5_6f6#EJcr&q zPem6M1uBSk$9O{93k^?p@C>d{!=HwRrbE%iP#8 z(@k#aa8AhjS*$3`!>y`i!h3Nytg>C%66YwUqeo<5@y7HT*T>04TXSm9;R~Wl_Bc9c zf)bpY`nu(KMSZ@~`A~Pn0nu$&cWPcV6880wK#H8N=QjL6)AOwL4Np*m2gq>1%J2o- zj(MpjAQ;ZL^5WWzA<-w29pRZbuPP2!9EQXA|=<(LT z@x)@U=I;NyR})VAw{>kIA;lA!p4P|2%QZ0sUf#-S+GDciG1GxNJlE0AD`XaeLDghvE%BJ^s2Q_xfzd{|{%&$?i5zhhC-b zuGZ>RY-zNVds91*Jvz0dD3|TV<(M>a7xx{1(-f9%$K+e#Brdi2CJ$X-H&^U(teslXZEGU!b2upP zengBTV2v5y(-+K8z_4G>u0W-+g2Gh~LGyCmvqbzEWbc{PdIYNu-$&@Z@cUrQtFpSY}7wfzdxM`HG*oEIl2%FTqUfAELku?n zhFI!wHxi($9hcy8sJ^#FBd={<@wM#~E9}r%!%dBK``TKP|46iU0~;;dXG1 zVF7gkIwGI~prZon26Rk7a7LIapkA*Jt;dhURl#_NMD0XW>u1EnVNo6tqY^>1V$Ci;yRGjFM2 z;Im&+pz@8y@F1q(_}AAR7a9QpzmcR=%3{sPtm2GYe8F<~AkIRdi8D#+j=zI?sP_lj zsw-5vqzF3{AoOvdnQqy(gQUIUAYT&5S$>ZCQ+UuQwl!DWtF3Zhr^V@X@q8gybca!R5)Ot$+p&ow;Y&Y3hS67kBK@ffzg+QU z_}fFC>?de*FbTp+EH-lF$Av=C8z;zZv^L?L!eb?i1CuvABpk^>h0_os$At8rhFGb} zf^>`=B+asibXJgBw_I|Zka)|6WZj$QMyT#c(**@?B7t|!WMRbj55zpFCJE||R9i!j z^zeA`aJxU`|Jhme>p|YOP zrBtwDrG!tLT3&?6)n~T#oWBK3ei{XAm+;q+0ToUiUqmCMo(w&u4vD9b&m+_vMB!3! zD?!W!xp90SL5G-w10vGoqrl~3RFE~{7pWi<5+oTsbey9PsG>fp*^W4Mzr{}G$BBs8 zO0B7I3Uawl#aSu_sCXMipmEb#v4Z4MXpl7=qm8XVqqqgPO0{NsoMgvOQgI5!U|%3t zjO+%t!u{nMU2PaU5%~e+5(2fnYI2xvp$+&vIvP3GtXSR_T!m*i&vua7Fd3!gja29k zm_{}Pmh1))mW@KVCFZ!fLXnt}iJj@KHaq+_F-9Rh4$uvPze@#e2|j`1r?}kDQAmj- z)$u!JAo3ZSlys6y@Ee-S%k<`d_|lB@ zGX3~pMc@x#0yY6D0Jr$`7!m>SEOummg=tMCswYwZLzZePp1Q61@{=U4R7Ke#n(2Mo zGJD&+><%4@TtqDmYCmI{SE+eOJ^-sL!pZ{cE0#L|s}N!3cjT86|8LgyDxTo2JHmpG z0&Cmv^@K?aw8Eo-)lKq8m~SsUl-`cSN1w3Ab6%EG9^feTGV>Ik4-yh&vwyGEo{ZZ2 zMf)GX@9Eu9g2Dci-H+yxX=eQ_R{97#0eg>EWk@0c3!Xxnoje+P z2suB;BzB5=vA2i^*Yl1c9hGCJ(Q-z#e4kn%iyS+}P7@|c6;yRW_2h4$dREZ*#cNdm z@c^m=uTgz_A70-9)dXIbguI@MculyY9aPVC@cOQx`gRAe`%!&QP<{3q)h`dAdj2)4 z=l1X_3%L#os=th=zTe{Y*Db0td%uI%ZwRXA4&XJOtLmPPC`wGsbo)5WSl-XSRQU{6 zD<7@W&}GTVVrS5b0mj;QO5s$S-|Z9Z49=PAiurpdeZ&!>*9Fn*R0luBcCXexQQg06 z@gsJL==%Zs=GbsFXVv{xgmaP&(~Jeqh@fAL>4(uq6FU)OLv4xFVq>)`CaULCQ;*+2(p zX+HicUR&CNvov1@XW#uDI4kVotP8IjLhpejmd0Lx#j>{edONbpvfz0+OW#>~Y-r-%$Ugk!?wy7C+1qGJ#~@m7eljyZH$OXr))@R+ ze%ScNT_=1e;37s7--*-^rF16f###yOc0|g_hvV0%I4IWqTK+IdQ*x8s<6ZLmL_pk7 zuP-c3?^1yFwX7Qn)EesT`%BYC0_85ff@-Lr&M!{yQea^u05;SIs2d5Cjl{j#>0Nri zWhnP%gB-nZu(*kLDpn(hFE}iIVWalk;=9CEBjc7$=K>OF)-L7JqPl0VTaAR?ukVW2 zW)#jfvV@7Zj2CRy$j2~6SGyEZHL@Zgx*!_pg7ur>EipEc4+;z{LR6n zk-cT{G9E$zMwZ?#-oy(Q+cA3P>ecF;C0M?1GjG-C?P$4S*(eq1bM~gihzklejN|hn#}MHn z)LmM&D;^~Tz)Z**M&Jm*Y1|w>Z&|7@ui#azYjoen#}l=6P`L$?coVALxqWx`PN2aY64n=4*_bQsxZtKaXp3fM#gEqU_Zx6rP~?8Zo%I@@UPX{go155@bPxGWI=q)`#nNf%?!Zi9A8rpWXyeZxQ38 zOcNjCPHze5DaHGRXgq>dTs_DTq*EcEfRf?dl1f@&XOV*-h>~-ZC7`?ku#lyo+7bL{ z=Iy8S4kCU^^FQbb^^Ym`cl0?SbCNuLG`DWr4!-A;gB-6r^m&|TxgO=hy2I}#M0SeK zL6Dml-Y(0oaur9 literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Affy/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Affy/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..822500fe397204f39ac71a9de049c17228a35b68 GIT binary patch literal 226 zcmXwz!A=4}5JY=9C^7I4b94{D5{*P-3?MfN7sJI&GHGXep>=kbY!3+jk-x;%zu?5I ziIr3(?^0FMZubhJ?x16@6M)|a{uLR<(bA6L;0;{z3Tf;6c1Gyc7l*Z)Jt@&v zrNbLdS!zBk}`kTYC{a93P>*?vD`5g^s)Q>)6 literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Align/AlignInfo.py b/code/lib/Bio/Align/AlignInfo.py new file mode 100644 index 0000000..95a4e69 --- /dev/null +++ b/code/lib/Bio/Align/AlignInfo.py @@ -0,0 +1,594 @@ +# Copyright 2000 Brad Chapman. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Extract information from alignment objects. + +In order to try and avoid huge alignment objects with tons of functions, +functions which return summary type information about alignments should +be put into classes in this module. +""" + + +import math +import sys + +from Bio.Seq import Seq + + +class SummaryInfo: + """Calculate summary info about the alignment. + + This class should be used to calculate information summarizing the + results of an alignment. This may either be straight consensus info + or more complicated things. + """ + + def __init__(self, alignment): + """Initialize with the alignment to calculate information on. + + ic_vector attribute. A list of ic content for each column number. + """ + self.alignment = alignment + self.ic_vector = [] + + def dumb_consensus(self, threshold=0.7, ambiguous="X", require_multiple=False): + """Output a fast consensus sequence of the alignment. + + This doesn't do anything fancy at all. It will just go through the + sequence residue by residue and count up the number of each type + of residue (ie. A or G or T or C for DNA) in all sequences in the + alignment. If the percentage of the most common residue type is + greater then the passed threshold, then we will add that residue type, + otherwise an ambiguous character will be added. + + This could be made a lot fancier (ie. to take a substitution matrix + into account), but it just meant for a quick and dirty consensus. + + Arguments: + - threshold - The threshold value that is required to add a particular + atom. + - ambiguous - The ambiguous character to be added when the threshold is + not reached. + - require_multiple - If set as True, this will require that more than + 1 sequence be part of an alignment to put it in the consensus (ie. + not just 1 sequence and gaps). + + """ + # Iddo Friedberg, 1-JUL-2004: changed ambiguous default to "X" + consensus = "" + + # find the length of the consensus we are creating + con_len = self.alignment.get_alignment_length() + + # go through each seq item + for n in range(con_len): + # keep track of the counts of the different atoms we get + atom_dict = {} + num_atoms = 0 + + for record in self.alignment: + # make sure we haven't run past the end of any sequences + # if they are of different lengths + if n < len(record.seq): + if record.seq[n] != "-" and record.seq[n] != ".": + if record.seq[n] not in atom_dict: + atom_dict[record.seq[n]] = 1 + else: + atom_dict[record.seq[n]] += 1 + + num_atoms = num_atoms + 1 + + max_atoms = [] + max_size = 0 + + for atom in atom_dict: + if atom_dict[atom] > max_size: + max_atoms = [atom] + max_size = atom_dict[atom] + elif atom_dict[atom] == max_size: + max_atoms.append(atom) + + if require_multiple and num_atoms == 1: + consensus += ambiguous + elif (len(max_atoms) == 1) and ( + (float(max_size) / float(num_atoms)) >= threshold + ): + consensus += max_atoms[0] + else: + consensus += ambiguous + + return Seq(consensus) + + def gap_consensus(self, threshold=0.7, ambiguous="X", require_multiple=False): + """Output a fast consensus sequence of the alignment, allowing gaps. + + Same as dumb_consensus(), but allows gap on the output. + + Things to do: + - Let the user define that with only one gap, the result + character in consensus is gap. + - Let the user select gap character, now + it takes the same as input. + + """ + consensus = "" + + # find the length of the consensus we are creating + con_len = self.alignment.get_alignment_length() + + # go through each seq item + for n in range(con_len): + # keep track of the counts of the different atoms we get + atom_dict = {} + num_atoms = 0 + + for record in self.alignment: + # make sure we haven't run past the end of any sequences + # if they are of different lengths + if n < len(record.seq): + if record.seq[n] not in atom_dict: + atom_dict[record.seq[n]] = 1 + else: + atom_dict[record.seq[n]] += 1 + + num_atoms += 1 + + max_atoms = [] + max_size = 0 + + for atom in atom_dict: + if atom_dict[atom] > max_size: + max_atoms = [atom] + max_size = atom_dict[atom] + elif atom_dict[atom] == max_size: + max_atoms.append(atom) + + if require_multiple and num_atoms == 1: + consensus += ambiguous + elif (len(max_atoms) == 1) and ( + (float(max_size) / float(num_atoms)) >= threshold + ): + consensus += max_atoms[0] + else: + consensus += ambiguous + + return Seq(consensus) + + def replacement_dictionary(self, skip_chars=None, letters=None): + """Generate a replacement dictionary to plug into a substitution matrix. + + This should look at an alignment, and be able to generate the number + of substitutions of different residues for each other in the + aligned object. + + Will then return a dictionary with this information:: + + {('A', 'C') : 10, ('C', 'A') : 12, ('G', 'C') : 15 ....} + + This also treats weighted sequences. The following example shows how + we calculate the replacement dictionary. Given the following + multiple sequence alignment:: + + GTATC 0.5 + AT--C 0.8 + CTGTC 1.0 + + For the first column we have:: + + ('A', 'G') : 0.5 * 0.8 = 0.4 + ('C', 'G') : 0.5 * 1.0 = 0.5 + ('A', 'C') : 0.8 * 1.0 = 0.8 + + We then continue this for all of the columns in the alignment, summing + the information for each substitution in each column, until we end + up with the replacement dictionary. + + Arguments: + - skip_chars - Not used; setting it to anything other than None + will raise a ValueError + - letters - An iterable (e.g. a string or list of characters to include. + """ + if skip_chars is not None: + raise ValueError( + "argument skip_chars has been deprecated; instead, please use 'letters' to specify the characters you want to include" + ) + rep_dict = {(letter1, letter2): 0 for letter1 in letters for letter2 in letters} + + # iterate through each record + for rec_num1 in range(len(self.alignment)): + # iterate through each record from one beyond the current record + # to the end of the list of records + for rec_num2 in range(rec_num1 + 1, len(self.alignment)): + # for each pair of records, compare the sequences and add + # the pertinent info to the dictionary + self._pair_replacement( + self.alignment[rec_num1].seq, + self.alignment[rec_num2].seq, + self.alignment[rec_num1].annotations.get("weight", 1.0), + self.alignment[rec_num2].annotations.get("weight", 1.0), + rep_dict, + letters, + ) + + return rep_dict + + def _pair_replacement(self, seq1, seq2, weight1, weight2, dictionary, letters): + """Compare two sequences and generate info on the replacements seen (PRIVATE). + + Arguments: + - seq1, seq2 - The two sequences to compare. + - weight1, weight2 - The relative weights of seq1 and seq2. + - dictionary - The dictionary containing the starting replacement + info that we will modify. + - letters - A list of characters to include when calculating replacements. + + """ + # loop through each residue in the sequences + for residue1, residue2 in zip(seq1, seq2): + if residue1 in letters and residue2 in letters: + dictionary[(residue1, residue2)] += weight1 * weight2 + + def _get_all_letters(self): + """Return a string containing the expected letters in the alignment (PRIVATE).""" + set_letters = set() + for record in self.alignment: + set_letters.update(record.seq) + list_letters = sorted(set_letters) + all_letters = "".join(list_letters) + return all_letters + + def pos_specific_score_matrix(self, axis_seq=None, chars_to_ignore=None): + """Create a position specific score matrix object for the alignment. + + This creates a position specific score matrix (pssm) which is an + alternative method to look at a consensus sequence. + + Arguments: + - chars_to_ignore - A list of all characters not to include in + the pssm. + - axis_seq - An optional argument specifying the sequence to + put on the axis of the PSSM. This should be a Seq object. If nothing + is specified, the consensus sequence, calculated with default + parameters, will be used. + + Returns: + - A PSSM (position specific score matrix) object. + + """ + # determine all of the letters we have to deal with + all_letters = self._get_all_letters() + assert all_letters + + if chars_to_ignore is None: + chars_to_ignore = [] + if not isinstance(chars_to_ignore, list): + raise TypeError("chars_to_ignore should be a list.") + + gap_char = "-" + chars_to_ignore.append(gap_char) + + for char in chars_to_ignore: + all_letters = all_letters.replace(char, "") + + if axis_seq: + left_seq = axis_seq + assert len(axis_seq) == self.alignment.get_alignment_length() + else: + left_seq = self.dumb_consensus() + + pssm_info = [] + # now start looping through all of the sequences and getting info + for residue_num in range(len(left_seq)): + score_dict = dict.fromkeys(all_letters, 0) + for record in self.alignment: + try: + this_residue = record.seq[residue_num] + # if we hit an index error we've run out of sequence and + # should not add new residues + except IndexError: + this_residue = None + + if this_residue and this_residue not in chars_to_ignore: + weight = record.annotations.get("weight", 1.0) + try: + score_dict[this_residue] += weight + except KeyError: + raise ValueError( + "Residue %s not found" % this_residue + ) from None + + pssm_info.append((left_seq[residue_num], score_dict)) + + return PSSM(pssm_info) + + def information_content( + self, + start=0, + end=None, + e_freq_table=None, + log_base=2, + chars_to_ignore=None, + pseudo_count=0, + ): + """Calculate the information content for each residue along an alignment. + + Arguments: + - start, end - The starting an ending points to calculate the + information content. These points should be relative to the first + sequence in the alignment, starting at zero (ie. even if the 'real' + first position in the seq is 203 in the initial sequence, for + the info content, we need to use zero). This defaults to the entire + length of the first sequence. + - e_freq_table - A dictionary specifying the expected frequencies + for each letter (e.g. {'G' : 0.4, 'C' : 0.4, 'T' : 0.1, 'A' : 0.1}). + Gap characters should not be included, since these should not have + expected frequencies. + - log_base - The base of the logarithm to use in calculating the + information content. This defaults to 2 so the info is in bits. + - chars_to_ignore - A listing of characters which should be ignored + in calculating the info content. Defaults to none. + + Returns: + - A number representing the info content for the specified region. + + Please see the Biopython manual for more information on how information + content is calculated. + + """ + # if no end was specified, then we default to the end of the sequence + if end is None: + end = len(self.alignment[0].seq) + if chars_to_ignore is None: + chars_to_ignore = [] + + if start < 0 or end > len(self.alignment[0].seq): + raise ValueError( + "Start (%s) and end (%s) are not in the range %s to %s" + % (start, end, 0, len(self.alignment[0].seq)) + ) + # determine random expected frequencies, if necessary + random_expected = None + # determine all of the letters we have to deal with + all_letters = self._get_all_letters() + for char in chars_to_ignore: + all_letters = all_letters.replace(char, "") + + info_content = {} + for residue_num in range(start, end): + freq_dict = self._get_letter_freqs( + residue_num, + self.alignment, + all_letters, + chars_to_ignore, + pseudo_count, + e_freq_table, + random_expected, + ) + # print(freq_dict, end="") + column_score = self._get_column_info_content( + freq_dict, e_freq_table, log_base, random_expected + ) + info_content[residue_num] = column_score + # sum up the score + total_info = sum(info_content.values()) + # fill in the ic_vector member: holds IC for each column + # reset ic_vector to empty list at each call + self.ic_vector = [] + for (i, k) in enumerate(info_content): + self.ic_vector.append(info_content[i + start]) + return total_info + + def _get_letter_freqs( + self, + residue_num, + all_records, + letters, + to_ignore, + pseudo_count=0, + e_freq_table=None, + random_expected=None, + ): + """Determine the frequency of specific letters in the alignment (PRIVATE). + + Arguments: + - residue_num - The number of the column we are getting frequencies + from. + - all_records - All of the SeqRecords in the alignment. + - letters - The letters we are interested in getting the frequency + for. + - to_ignore - Letters we are specifically supposed to ignore. + - pseudo_count - Optional argument specifying the Pseudo count (k) + to add in order to prevent a frequency of 0 for a letter. + - e_freq_table - An optional argument specifying a dictionary with + the expected frequencies for each letter. + - random_expected - Optional argument that specify the frequency to use + when e_freq_table is not defined. + + This will calculate the frequencies of each of the specified letters + in the alignment at the given frequency, and return this as a + dictionary where the keys are the letters and the values are the + frequencies. Pseudo count can be added to prevent a null frequency + """ + freq_info = dict.fromkeys(letters, 0) + + total_count = 0 + + gap_char = "-" + + if pseudo_count < 0: + raise ValueError( + "Positive value required for pseudo_count, %s provided" % (pseudo_count) + ) + + # collect the count info into the dictionary for all the records + for record in all_records: + try: + if record.seq[residue_num] not in to_ignore: + weight = record.annotations.get("weight", 1.0) + freq_info[record.seq[residue_num]] += weight + total_count += weight + except KeyError: + raise ValueError( + "Residue %s not found in letters %s" + % (record.seq[residue_num], letters) + ) from None + + if e_freq_table: + # check if all the residus in freq_info are in e_freq_table + for key in freq_info: + if key != gap_char and key not in e_freq_table: + raise ValueError("%s not found in expected frequency table" % key) + + if total_count == 0: + # This column must be entirely ignored characters + for letter in freq_info: + assert freq_info[letter] == 0 + # TODO - Map this to NA or NaN? + else: + # now convert the counts into frequencies + for letter in freq_info: + if pseudo_count and (random_expected or e_freq_table): + # use either the expected random freq or the + if e_freq_table: + ajust_freq = e_freq_table[letter] + else: + ajust_freq = random_expected + + ajusted_letter_count = freq_info[letter] + ajust_freq * pseudo_count + ajusted_total = total_count + pseudo_count + freq_info[letter] = ajusted_letter_count / ajusted_total + + else: + freq_info[letter] = freq_info[letter] / total_count + + return freq_info + + def _get_column_info_content( + self, obs_freq, e_freq_table, log_base, random_expected + ): + """Calculate the information content for a column (PRIVATE). + + Arguments: + - obs_freq - The frequencies observed for each letter in the column. + - e_freq_table - An optional argument specifying a dictionary with + the expected frequencies for each letter. + - log_base - The base of the logarithm to use in calculating the + info content. + + """ + gap_char = "-" + + if e_freq_table: + # check the expected freq information to make sure it is good + for key in obs_freq: + if key != gap_char and key not in e_freq_table: + raise ValueError( + f"Frequency table provided does not contain observed letter {key}" + ) + + total_info = 0.0 + + for letter in obs_freq: + inner_log = 0.0 + # if we have expected frequencies, modify the log value by them + # gap characters do not have expected frequencies, so they + # should just be the observed frequency. + if letter != gap_char: + if e_freq_table: + inner_log = obs_freq[letter] / e_freq_table[letter] + else: + inner_log = obs_freq[letter] / random_expected + # if the observed frequency is zero, we don't add any info to the + # total information content + if inner_log > 0: + letter_info = ( + obs_freq[letter] * math.log(inner_log) / math.log(log_base) + ) + total_info += letter_info + return total_info + + def get_column(self, col): + """Return column of alignment.""" + # TODO - Deprecate this and implement slicing? + return self.alignment[:, col] + + +class PSSM: + """Represent a position specific score matrix. + + This class is meant to make it easy to access the info within a PSSM + and also make it easy to print out the information in a nice table. + + Let's say you had an alignment like this:: + + GTATC + AT--C + CTGTC + + The position specific score matrix (when printed) looks like:: + + G A T C + G 1 1 0 1 + T 0 0 3 0 + A 1 1 0 0 + T 0 0 2 0 + C 0 0 0 3 + + You can access a single element of the PSSM using the following:: + + your_pssm[sequence_number][residue_count_name] + + For instance, to get the 'T' residue for the second element in the + above alignment you would need to do: + + your_pssm[1]['T'] + """ + + def __init__(self, pssm): + """Initialize with pssm data to represent. + + The pssm passed should be a list with the following structure: + + list[0] - The letter of the residue being represented (for instance, + from the example above, the first few list[0]s would be GTAT... + list[1] - A dictionary with the letter substitutions and counts. + """ + self.pssm = pssm + + def __getitem__(self, pos): + return self.pssm[pos][1] + + def __str__(self): + out = " " + all_residues = sorted(self.pssm[0][1]) + + # first print out the top header + for res in all_residues: + out += " %s" % res + out += "\n" + + # for each item, write out the substitutions + for item in self.pssm: + out += "%s " % item[0] + for res in all_residues: + out += " %.1f" % item[1][res] + + out += "\n" + return out + + def get_residue(self, pos): + """Return the residue letter at the specified position.""" + return self.pssm[pos][0] + + +def print_info_content(summary_info, fout=None, rep_record=0): + """3 column output: position, aa in representative sequence, ic_vector value.""" + fout = fout or sys.stdout + if not summary_info.ic_vector: + summary_info.information_content() + rep_sequence = summary_info.alignment[rep_record].seq + for pos, ic in enumerate(summary_info.ic_vector): + fout.write("%d %s %.3f\n" % (pos, rep_sequence[pos], ic)) diff --git a/code/lib/Bio/Align/Applications/_ClustalOmega.py b/code/lib/Bio/Align/Applications/_ClustalOmega.py new file mode 100644 index 0000000..2181bc5 --- /dev/null +++ b/code/lib/Bio/Align/Applications/_ClustalOmega.py @@ -0,0 +1,269 @@ +# Copyright 2011 by Andreas Wilm. All rights reserved. +# Based on ClustalW wrapper copyright 2009 by Cymon J. Cox. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Command line wrapper for the multiple alignment program Clustal Omega.""" + + +from Bio.Application import _Option, _Switch, AbstractCommandline + + +class ClustalOmegaCommandline(AbstractCommandline): + """Command line wrapper for clustal omega. + + http://www.clustal.org/omega + + Notes + ----- + Last checked against version: 1.2.0 + + References + ---------- + Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R, + McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011). + Fast, scalable generation of high-quality protein multiple + sequence alignments using Clustal Omega. + Molecular Systems Biology 7:539 https://doi.org/10.1038/msb.2011.75 + + Examples + -------- + >>> from Bio.Align.Applications import ClustalOmegaCommandline + >>> in_file = "unaligned.fasta" + >>> out_file = "aligned.fasta" + >>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True) + >>> print(clustalomega_cline) + clustalo -i unaligned.fasta -o aligned.fasta --auto -v + + You would typically run the command line with clustalomega_cline() or via + the Python subprocess module, as described in the Biopython tutorial. + + """ + + def __init__(self, cmd="clustalo", **kwargs): + """Initialize the class.""" + # order parameters in the same order as clustalo --help + self.parameters = [ + # Sequence Input + _Option( + ["-i", "--in", "--infile", "infile"], + "Multiple sequence input file", + filename=True, + equate=False, + ), + _Option( + ["--hmm-in", "HMM input", "hmm_input"], + "HMM input files", + filename=True, + equate=False, + ), + _Switch(["--dealign", "dealign"], "Dealign input sequences"), + _Option( + ["--profile1", "--p1", "profile1"], + "Pre-aligned multiple sequence file (aligned columns will be kept fix).", + filename=True, + equate=False, + ), + _Option( + ["--profile2", "--p2", "profile2"], + "Pre-aligned multiple sequence file (aligned columns will be kept fix).", + filename=True, + equate=False, + ), + _Option( + ["-t", "--seqtype", "seqtype"], + "{Protein, RNA, DNA} Force a sequence type (default: auto).", + equate=False, + checker_function=lambda x: x + in ["protein", "rna", "dna", "Protein", "RNA", "DNA", "PROTEIN"], + ), + _Switch( + ["--is-profile", "isprofile"], + "disable check if profile, force profile (default no)", + ), + _Option( + ["--infmt", "infmt"], + """Forced sequence input file format (default: auto) + + Allowed values: a2m, fa[sta], clu[stal], msf, phy[lip], selex, st[ockholm], vie[nna] + """, + equate=False, + checker_function=lambda x: x + in [ + "a2m", + "fa", + "fasta", + "clu", + "clustal", + "msf", + "phy", + "phylip", + "selex", + "st", + "stockholm", + "vie", + "vienna", + ], + ), + # Clustering + _Option( + ["--distmat-in", "distmat_in"], + "Pairwise distance matrix input file (skips distance computation).", + filename=True, + equate=False, + ), + _Option( + ["--distmat-out", "distmat_out"], + "Pairwise distance matrix output file.", + filename=True, + equate=False, + ), + _Option( + ["--guidetree-in", "guidetree_in"], + "Guide tree input file (skips distance computation and guide-tree clustering step).", + filename=True, + equate=False, + ), + _Option( + ["--guidetree-out", "guidetree_out"], + "Guide tree output file.", + filename=True, + equate=False, + ), + _Switch( + ["--full", "distmat_full"], + "Use full distance matrix for guide-tree calculation (slow; mBed is default)", + ), + _Switch( + ["--full-iter", "distmat_full_iter"], + "Use full distance matrix for guide-tree calculation during iteration (mBed is default)", + ), + _Option( + ["--cluster-size", "clustersize"], + "soft maximum of sequences in sub-clusters", + checker_function=lambda x: isinstance(x, int), + ), + _Option( + ["--clustering-out", "clusteringout"], + "Clustering output file", + filename=True, + ), + _Switch( + ["--use-kimura", "usekimura"], + "use Kimura distance correction for aligned sequences (default no)", + ), + _Switch( + ["--percent-id", "percentid"], + "convert distances into percent identities (default no)", + ), + # Alignment Output + _Option( + ["-o", "--out", "--outfile", "outfile"], + "Multiple sequence alignment output file (default: stdout).", + filename=True, + equate=False, + ), + _Option( + ["--outfmt", "outfmt"], + "MSA output file format:" + " a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]" + " (default: fasta).", + equate=False, + checker_function=lambda x: x + in [ + "a2m", + "fa", + "fasta", + "clu", + "clustal", + "msf", + "phy", + "phylip", + "selex", + "st", + "stockholm", + "vie", + "vienna", + ], + ), + _Switch( + ["--residuenumber", "--resno", "residuenumber"], + "in Clustal format print residue numbers (default no)", + ), + _Option( + ["--wrap", "wrap"], + "number of residues before line-wrap in output", + checker_function=lambda x: isinstance(x, int), + ), + _Option( + ["--output-order", "outputorder"], + "MSA output order like in input/guide-tree", + checker_function=lambda x: x in ["input-order", "tree-order"], + ), + # Iteration + _Option( + ["--iterations", "--iter", "iterations"], + "Number of (combined guide-tree/HMM) iterations", + equate=False, + checker_function=lambda x: isinstance(x, int), + ), + _Option( + ["--max-guidetree-iterations", "max_guidetree_iterations"], + "Maximum number of guidetree iterations", + equate=False, + checker_function=lambda x: isinstance(x, int), + ), + _Option( + ["--max-hmm-iterations", "max_hmm_iterations"], + "Maximum number of HMM iterations", + equate=False, + checker_function=lambda x: isinstance(x, int), + ), + # Limits (will exit early, if exceeded): + _Option( + ["--maxnumseq", "maxnumseq"], + "Maximum allowed number of sequences", + equate=False, + checker_function=lambda x: isinstance(x, int), + ), + _Option( + ["--maxseqlen", "maxseqlen"], + "Maximum allowed sequence length", + equate=False, + checker_function=lambda x: isinstance(x, int), + ), + # Miscellaneous: + _Switch( + ["--auto", "auto"], + "Set options automatically (might overwrite some of your options)", + ), + _Option( + ["--threads", "threads"], + "Number of processors to use", + equate=False, + checker_function=lambda x: isinstance(x, int), + ), + _Option( + ["-l", "--log", "log"], + "Log all non-essential output to this file.", + filename=True, + equate=False, + ), + _Switch(["-h", "--help", "help"], "Print help and exit."), + _Switch(["-v", "--verbose", "verbose"], "Verbose output"), + _Switch(["--version", "version"], "Print version information and exit"), + _Switch( + ["--long-version", "long_version"], + "Print long version information and exit", + ), + _Switch(["--force", "force"], "Force file overwriting."), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/Applications/_Clustalw.py b/code/lib/Bio/Align/Applications/_Clustalw.py new file mode 100644 index 0000000..777e411 --- /dev/null +++ b/code/lib/Bio/Align/Applications/_Clustalw.py @@ -0,0 +1,486 @@ +# Copyright 2009 by Cymon J. Cox. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Command line wrapper for the multiple alignment program Clustal W.""" + + +import os +from Bio.Application import _Option, _Switch, AbstractCommandline + + +class ClustalwCommandline(AbstractCommandline): + """Command line wrapper for clustalw (version one or two). + + http://www.clustal.org/ + + Notes + ----- + Last checked against versions: 1.83 and 2.1 + + References + ---------- + Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA, + McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD, + Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0. + Bioinformatics, 23, 2947-2948. + + Examples + -------- + >>> from Bio.Align.Applications import ClustalwCommandline + >>> in_file = "unaligned.fasta" + >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file) + >>> print(clustalw_cline) + clustalw2 -infile=unaligned.fasta + + You would typically run the command line with clustalw_cline() or via + the Python subprocess module, as described in the Biopython tutorial. + + """ + + # TODO - Should we default to cmd="clustalw2" now? + def __init__(self, cmd="clustalw", **kwargs): + """Initialize the class.""" + self.parameters = [ + _Option( + ["-infile", "-INFILE", "INFILE", "infile"], + "Input sequences.", + filename=True, + ), + _Option( + ["-profile1", "-PROFILE1", "PROFILE1", "profile1"], + "Profiles (old alignment).", + filename=True, + ), + _Option( + ["-profile2", "-PROFILE2", "PROFILE2", "profile2"], + "Profiles (old alignment).", + filename=True, + ), + # ################# VERBS (do things) ############################# + _Switch( + ["-options", "-OPTIONS", "OPTIONS", "options"], + "List the command line parameters", + ), + _Switch( + ["-help", "-HELP", "HELP", "help"], "Outline the command line params." + ), + _Switch( + ["-check", "-CHECK", "CHECK", "check"], + "Outline the command line params.", + ), + _Switch( + ["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"], + "Output full help content.", + ), + _Switch( + ["-align", "-ALIGN", "ALIGN", "align"], "Do full multiple alignment." + ), + _Switch(["-tree", "-TREE", "TREE", "tree"], "Calculate NJ tree."), + _Switch( + ["-pim", "-PIM", "PIM", "pim"], + "Output percent identity matrix (while calculating the tree).", + ), + _Option( + ["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"], + "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).", + checker_function=lambda x: isinstance(x, int), + ), + _Switch( + ["-convert", "-CONVERT", "CONVERT", "convert"], + "Output the input sequences in a different file format.", + ), + # #################### PARAMETERS (set things) ######################### + # ***General settings:**** + # Makes no sense in biopython + # _Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"], + # [], + # lambda x: 0, # Does not take value + # False, + # "read command line, then enter normal interactive menus", + # False), + _Switch( + ["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"], + "Use FAST algorithm for the alignment guide tree", + ), + _Option( + ["-type", "-TYPE", "TYPE", "type"], + "PROTEIN or DNA sequences", + checker_function=lambda x: x in ["PROTEIN", "DNA", "protein", "dna"], + ), + _Switch( + ["-negative", "-NEGATIVE", "NEGATIVE", "negative"], + "Protein alignment with negative values in matrix", + ), + _Option( + ["-outfile", "-OUTFILE", "OUTFILE", "outfile"], + "Output sequence alignment file name", + filename=True, + ), + _Option( + ["-output", "-OUTPUT", "OUTPUT", "output"], + "Output format: CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA", + checker_function=lambda x: x + in [ + "CLUSTAL", + "GCG", + "GDE", + "PHYLIP", + "PIR", + "NEXUS", + "FASTA", + "clustal", + "gcg", + "gde", + "phylip", + "pir", + "nexus", + "fasta", + ], + ), + _Option( + ["-outorder", "-OUTORDER", "OUTORDER", "outorder"], + "Output taxon order: INPUT or ALIGNED", + checker_function=lambda x: x + in ["INPUT", "input", "ALIGNED", "aligned"], + ), + _Option( + ["-case", "-CASE", "CASE", "case"], + "LOWER or UPPER (for GDE output only)", + checker_function=lambda x: x in ["UPPER", "upper", "LOWER", "lower"], + ), + _Option( + ["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"], + "OFF or ON (for Clustal output only)", + checker_function=lambda x: x in ["ON", "on", "OFF", "off"], + ), + _Option( + ["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"], + "OFF or ON (NEW- for all output formats)", + checker_function=lambda x: x in ["ON", "on", "OFF", "off"], + ), + _Option( + ["-range", "-RANGE", "RANGE", "range"], + "Sequence range to write starting m to m+n. " + "Input as string eg. '24,200'", + ), + _Option( + ["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"], + "Maximum allowed input sequence length", + checker_function=lambda x: isinstance(x, int), + ), + _Switch( + ["-quiet", "-QUIET", "QUIET", "quiet"], + "Reduce console output to minimum", + ), + _Option( + ["-stats", "-STATS", "STATS", "stats"], + "Log some alignment statistics to file", + filename=True, + ), + # ***Fast Pairwise Alignments:*** + _Option( + ["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"], + "Word size", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"], + "Number of best diags.", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-window", "-WINDOW", "WINDOW", "window"], + "Window around best diags.", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"], + "Gap penalty", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-score", "-SCORE", "SCORE", "score"], + "Either: PERCENT or ABSOLUTE", + checker_function=lambda x: x + in ["percent", "PERCENT", "absolute", "ABSOLUTE"], + ), + # ***Slow Pairwise Alignments:*** + _Option( + ["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"], + "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", + checker_function=lambda x: ( + x + in [ + "BLOSUM", + "PAM", + "GONNET", + "ID", + "blosum", + "pam", + "gonnet", + "id", + ] + or os.path.exists(x) + ), + filename=True, + ), + _Option( + ["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"], + "DNA weight matrix=IUB, CLUSTALW or filename", + checker_function=lambda x: ( + x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x) + ), + filename=True, + ), + _Option( + ["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"], + "Gap opening penalty", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"], + "Gap extension penalty", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + # ***Multiple Alignments:*** + _Option( + ["-newtree", "-NEWTREE", "NEWTREE", "newtree"], + "Output file name for newly created guide tree", + filename=True, + ), + _Option( + ["-usetree", "-USETREE", "USETREE", "usetree"], + "File name of guide tree", + checker_function=lambda x: os.path.exists, + filename=True, + ), + _Option( + ["-matrix", "-MATRIX", "MATRIX", "matrix"], + "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", + checker_function=lambda x: ( + x + in [ + "BLOSUM", + "PAM", + "GONNET", + "ID", + "blosum", + "pam", + "gonnet", + "id", + ] + or os.path.exists(x) + ), + filename=True, + ), + _Option( + ["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"], + "DNA weight matrix=IUB, CLUSTALW or filename", + checker_function=lambda x: ( + x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x) + ), + filename=True, + ), + _Option( + ["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"], + "Gap opening penalty", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-gapext", "-GAPEXT", "GAPEXT", "gapext"], + "Gap extension penalty", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Switch( + ["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"], + "No end gap separation pen.", + ), + _Option( + ["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"], + "Gap separation pen. range", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Switch( + ["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], "Residue-specific gaps off" + ), + _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], "Hydrophilic gaps off"), + _Switch( + ["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"], + "List hydrophilic res.", + ), + _Option( + ["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"], + "% ident. for delay", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + # Already handled in General Settings section, but appears a second + # time under Multiple Alignments in the help + # _Option(["-type", "-TYPE", "TYPE", "type"], + # "PROTEIN or DNA", + # checker_function=lambda x: x in ["PROTEIN", "DNA", + # "protein", "dna"]), + _Option( + ["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"], + "Transitions weighting", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-iteration", "-ITERATION", "ITERATION", "iteration"], + "NONE or TREE or ALIGNMENT", + checker_function=lambda x: x + in ["NONE", "TREE", "ALIGNMENT", "none", "tree", "alignment"], + ), + _Option( + ["-numiter", "-NUMITER", "NUMITER", "numiter"], + "maximum number of iterations to perform", + checker_function=lambda x: isinstance(x, int), + ), + _Switch( + ["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"], + "Disable sequence weighting", + ), + # ***Profile Alignments:*** + _Switch( + ["-profile", "-PROFILE", "PROFILE", "profile"], + "Merge two alignments by profile alignment", + ), + _Option( + ["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"], + "Output file name for new guide tree of profile1", + filename=True, + ), + _Option( + ["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"], + "Output file for new guide tree of profile2", + filename=True, + ), + _Option( + ["-usetree1", "-USETREE1", "USETREE1", "usetree1"], + "File name of guide tree for profile1", + checker_function=lambda x: os.path.exists, + filename=True, + ), + _Option( + ["-usetree2", "-USETREE2", "USETREE2", "usetree2"], + "File name of guide tree for profile2", + checker_function=lambda x: os.path.exists, + filename=True, + ), + # ***Sequence to Profile Alignments:*** + _Switch( + ["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"], + "Sequentially add profile2 sequences to profile1 alignment", + ), + # These are already handled in the Multiple Alignments section, + # but appear a second time here in the help. + # _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], + # "File for new guide tree", + # filename=True), + # _Option(["-usetree", "-USETREE", "USETREE", "usetree"], + # "File for old guide tree", + # checker_function=lambda x: os.path.exists, + # filename=True), + # ***Structure Alignments:*** + _Switch( + ["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"], + "Do not use secondary structure-gap penalty mask for profile 1", + ), + _Switch( + ["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"], + "Do not use secondary structure-gap penalty mask for profile 2", + ), + _Option( + ["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"], + "STRUCTURE or MASK or BOTH or NONE output in alignment file", + checker_function=lambda x: x + in [ + "STRUCTURE", + "MASK", + "BOTH", + "NONE", + "structure", + "mask", + "both", + "none", + ], + ), + _Option( + ["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"], + "Gap penalty for helix core residues", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"], + "gap penalty for strand core residues", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"], + "Gap penalty for loop regions", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"], + "Gap penalty for structure termini", + checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), + ), + _Option( + ["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"], + "Number of residues inside helix to be treated as terminal", + checker_function=lambda x: isinstance(x, int), + ), + _Option( + ["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"], + "Number of residues outside helix to be treated as terminal", + checker_function=lambda x: isinstance(x, int), + ), + _Option( + ["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"], + "Number of residues inside strand to be treated as terminal", + checker_function=lambda x: isinstance(x, int), + ), + _Option( + ["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"], + "Number of residues outside strand to be treated as terminal", + checker_function=lambda x: isinstance(x, int), + ), + # ***Trees:*** + _Option( + ["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"], + "nj OR phylip OR dist OR nexus", + checker_function=lambda x: x + in ["NJ", "PHYLIP", "DIST", "NEXUS", "nj", "phylip", "dist", "nexus"], + ), + _Option( + ["-seed", "-SEED", "SEED", "seed"], + "Seed number for bootstraps.", + checker_function=lambda x: isinstance(x, int), + ), + _Switch( + ["-kimura", "-KIMURA", "KIMURA", "kimura"], "Use Kimura's correction." + ), + _Switch( + ["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"], + "Ignore positions with gaps.", + ), + _Option( + ["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"], + "Node OR branch position of bootstrap values in tree display", + checker_function=lambda x: x in ["NODE", "BRANCH", "node", "branch"], + ), + _Option( + ["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"], + "NJ or UPGMA", + checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"], + ), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/Applications/_Dialign.py b/code/lib/Bio/Align/Applications/_Dialign.py new file mode 100644 index 0000000..52be1b1 --- /dev/null +++ b/code/lib/Bio/Align/Applications/_Dialign.py @@ -0,0 +1,243 @@ +# Copyright 2009 by Cymon J. Cox. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Command line wrapper for the multiple alignment program DIALIGN2-2.""" + +from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline + + +class DialignCommandline(AbstractCommandline): + """Command line wrapper for the multiple alignment program DIALIGN2-2. + + http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html + + Notes + ----- + Last checked against version: 2.2 + + References + ---------- + B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence + Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36. + + Examples + -------- + To align a FASTA file (unaligned.fasta) with the output files names + aligned.* including a FASTA output file (aligned.fa), use: + + >>> from Bio.Align.Applications import DialignCommandline + >>> dialign_cline = DialignCommandline(input="unaligned.fasta", + ... fn="aligned", fa=True) + >>> print(dialign_cline) + dialign2-2 -fa -fn aligned unaligned.fasta + + You would typically run the command line with dialign_cline() or via + the Python subprocess module, as described in the Biopython tutorial. + + """ + + def __init__(self, cmd="dialign2-2", **kwargs): + """Initialize the class.""" + self.program_name = cmd + self.parameters = [ + _Switch( + ["-afc", "afc"], + r"Creates additional output file '\*.afc' " + "containing data of all fragments considered " + "for alignment WARNING: this file can be HUGE !", + ), + _Switch( + ["-afc_v", "afc_v"], + "Like '-afc' but verbose: fragments are explicitly " + "printed. WARNING: this file can be EVEN BIGGER !", + ), + _Switch( + ["-anc", "anc"], + "Anchored alignment. Requires a file .anc " + "containing anchor points.", + ), + _Switch( + ["-cs", "cs"], + "If segments are translated, not only the 'Watson " + "strand' but also the 'Crick strand' is looked at.", + ), + _Switch(["-cw", "cw"], "Additional output file in CLUSTAL W format."), + _Switch( + ["-ds", "ds"], + "'dna alignment speed up' - non-translated nucleic acid " + "fragments are taken into account only if they start " + "with at least two matches. Speeds up DNA alignment at " + "the expense of sensitivity.", + ), + _Switch(["-fa", "fa"], "Additional output file in FASTA format."), + _Switch( + ["-ff", "ff"], + r"Creates file \*.frg containing information about all " + "fragments that are part of the respective optimal " + "pairwise alignmnets plus information about " + "consistency in the multiple alignment", + ), + _Option( + ["-fn", "fn"], + "Output files are named ..", + equate=False, + ), + _Switch( + ["-fop", "fop"], + r"Creates file \*.fop containing coordinates of all " + "fragments that are part of the respective pairwise alignments.", + ), + _Switch( + ["-fsm", "fsm"], + r"Creates file \*.fsm containing coordinates of all " + "fragments that are part of the final alignment", + ), + _Switch( + ["-iw", "iw"], + "Overlap weights switched off (by default, overlap " + "weights are used if up to 35 sequences are aligned). " + "This option speeds up the alignment but may lead " + "to reduced alignment quality.", + ), + _Switch( + ["-lgs", "lgs"], + "'long genomic sequences' - combines the following " + "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, " + "-fop, -ff, -cs, -ds, -pst ", + ), + _Switch( + ["-lgs_t", "lgs_t"], + "Like '-lgs' but with all segment pairs assessed " + "at the peptide level (rather than 'mixed alignments' " + "as with the '-lgs' option). Therefore faster than " + "-lgs but not very sensitive for non-coding regions.", + ), + _Option( + ["-lmax", "lmax"], + "Maximum fragment length = x (default: x = 40 or " + "x = 120 for 'translated' fragments). Shorter x " + "speeds up the program but may affect alignment quality.", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + _Switch( + ["-lo", "lo"], + r"(Long Output) Additional file \*.log with information " + "about fragments selected for pairwise alignment and " + "about consistency in multi-alignment procedure.", + ), + _Switch( + ["-ma", "ma"], + "'mixed alignments' consisting of P-fragments and " + "N-fragments if nucleic acid sequences are aligned.", + ), + _Switch( + ["-mask", "mask"], + "Residues not belonging to selected fragments are " + r"replaced by '\*' characters in output alignment " + "(rather than being printed in lower-case characters)", + ), + _Switch( + ["-mat", "mat"], + r"Creates file \*mat with substitution counts derived " + "from the fragments that have been selected for alignment.", + ), + _Switch( + ["-mat_thr", "mat_thr"], + "Like '-mat' but only fragments with weight score " + "> t are considered", + ), + _Switch( + ["-max_link", "max_link"], + "'maximum linkage' clustering used to construct " + "sequence tree (instead of UPGMA).", + ), + _Switch(["-min_link", "min_link"], "'minimum linkage' clustering used."), + _Option(["-mot", "mot"], "'motif' option.", equate=False), + _Switch(["-msf", "msf"], "Separate output file in MSF format."), + _Switch( + ["-n", "n"], + "Input sequences are nucleic acid sequences. " + "No translation of fragments.", + ), + _Switch( + ["-nt", "nt"], + "Input sequences are nucleic acid sequences and " + "'nucleic acid segments' are translated to 'peptide " + "segments'.", + ), + _Switch( + ["-nta", "nta"], + "'no textual alignment' - textual alignment suppressed. " + "This option makes sense if other output files are of " + "interest -- e.g. the fragment files created with -ff, " + "-fop, -fsm or -lo.", + ), + _Switch( + ["-o", "o"], + "Fast version, resulting alignments may be slightly different.", + ), + _Switch( + ["-ow", "ow"], + "Overlap weights enforced (By default, overlap weights " + "are used only if up to 35 sequences are aligned since " + "calculating overlap weights is time consuming).", + ), + _Switch( + ["-pst", "pst"], + r"'print status'. Creates and updates a file \*.sta with " + "information about the current status of the program " + "run. This option is recommended if large data sets " + "are aligned since it allows the user to estimate the " + "remaining running time.", + ), + _Switch( + ["-smin", "smin"], + "Minimum similarity value for first residue pair " + "(or codon pair) in fragments. Speeds up protein " + "alignment or alignment of translated DNA fragments " + "at the expense of sensitivity.", + ), + _Option( + ["-stars", "stars"], + r"Maximum number of '\*' characters indicating degree " + "of local similarity among sequences. By default, no " + "stars are used but numbers between 0 and 9, instead.", + checker_function=lambda x: x in range(0, 10), + equate=False, + ), + _Switch(["-stdo", "stdo"], "Results written to standard output."), + _Switch( + ["-ta", "ta"], + "Standard textual alignment printed (overrides " + "suppression of textual alignments in special " + "options, e.g. -lgs)", + ), + _Option( + ["-thr", "thr"], + "Threshold T = x.", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + _Switch( + ["-xfr", "xfr"], + "'exclude fragments' - list of fragments can be " + "specified that are NOT considered for pairwise alignment", + ), + _Argument( + ["input"], + "Input file name. Must be FASTA format", + filename=True, + is_required=True, + ), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/Applications/_MSAProbs.py b/code/lib/Bio/Align/Applications/_MSAProbs.py new file mode 100644 index 0000000..74b26a1 --- /dev/null +++ b/code/lib/Bio/Align/Applications/_MSAProbs.py @@ -0,0 +1,89 @@ +# Copyright 2013 by Christian Brueffer. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Command line wrapper for the multiple sequence alignment program MSAProbs.""" + +from Bio.Application import _Argument, _Option, _Switch, AbstractCommandline + + +class MSAProbsCommandline(AbstractCommandline): + """Command line wrapper for MSAProbs. + + http://msaprobs.sourceforge.net + + Notes + ----- + Last checked against version: 0.9.7 + + References + ---------- + Yongchao Liu, Bertil Schmidt, Douglas L. Maskell: "MSAProbs: multiple + sequence alignment based on pair hidden Markov models and partition + function posterior probabilities". Bioinformatics, 2010, 26(16): 1958 -1964 + + Examples + -------- + >>> from Bio.Align.Applications import MSAProbsCommandline + >>> in_file = "unaligned.fasta" + >>> out_file = "aligned.cla" + >>> cline = MSAProbsCommandline(infile=in_file, outfile=out_file, clustalw=True) + >>> print(cline) + msaprobs -o aligned.cla -clustalw unaligned.fasta + + You would typically run the command line with cline() or via + the Python subprocess module, as described in the Biopython tutorial. + + """ + + def __init__(self, cmd="msaprobs", **kwargs): + """Initialize the class.""" + # order of parameters is the same as in msaprobs -help + self.parameters = [ + _Option( + ["-o", "--outfile", "outfile"], + "specify the output file name (STDOUT by default)", + filename=True, + equate=False, + ), + _Option( + ["-num_threads", "numthreads"], + "specify the number of threads used, and otherwise detect automatically", + checker_function=lambda x: isinstance(x, int), + ), + _Switch( + ["-clustalw", "clustalw"], + "use CLUSTALW output format instead of FASTA format", + ), + _Option( + ["-c", "consistency"], + "use 0 <= REPS <= 5 (default: 2) passes of consistency transformation", + checker_function=lambda x: isinstance(x, int) and 0 <= x <= 5, + ), + _Option( + ["-ir", "--iterative-refinement", "iterative_refinement"], + "use 0 <= REPS <= 1000 (default: 10) passes of iterative-refinement", + checker_function=lambda x: isinstance(x, int) and 0 <= x <= 1000, + ), + _Switch(["-v", "verbose"], "report progress while aligning (default: off)"), + _Option( + ["-annot", "annot"], + "write annotation for multiple alignment to FILENAME", + filename=True, + ), + _Switch( + ["-a", "--alignment-order", "alignment_order"], + "print sequences in alignment order rather than input order (default: off)", + ), + _Option(["-version", "version"], "print out version of MSAPROBS"), + _Argument(["infile"], "Multiple sequence input file", filename=True), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/Applications/_Mafft.py b/code/lib/Bio/Align/Applications/_Mafft.py new file mode 100644 index 0000000..4a0b901 --- /dev/null +++ b/code/lib/Bio/Align/Applications/_Mafft.py @@ -0,0 +1,435 @@ +# Copyright 2009 by Cymon J. Cox. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Command line wrapper for the multiple alignment programme MAFFT.""" + + +from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline + + +class MafftCommandline(AbstractCommandline): + """Command line wrapper for the multiple alignment program MAFFT. + + http://align.bmr.kyushu-u.ac.jp/mafft/software/ + + Notes + ----- + Last checked against version: MAFFT v6.717b (2009/12/03) + + References + ---------- + Katoh, Toh (BMC Bioinformatics 9:212, 2008) Improved accuracy of + multiple ncRNA alignment by incorporating structural information into + a MAFFT-based framework (describes RNA structural alignment methods) + + Katoh, Toh (Briefings in Bioinformatics 9:286-298, 2008) Recent + developments in the MAFFT multiple sequence alignment program + (outlines version 6) + + Katoh, Toh (Bioinformatics 23:372-374, 2007) Errata PartTree: an + algorithm to build an approximate tree from a large number of + unaligned sequences (describes the PartTree algorithm) + + Katoh, Kuma, Toh, Miyata (Nucleic Acids Res. 33:511-518, 2005) MAFFT + version 5: improvement in accuracy of multiple sequence alignment + (describes [ancestral versions of] the G-INS-i, L-INS-i and E-INS-i + strategies) + + Katoh, Misawa, Kuma, Miyata (Nucleic Acids Res. 30:3059-3066, 2002) + + Examples + -------- + >>> from Bio.Align.Applications import MafftCommandline + >>> mafft_exe = "/opt/local/mafft" + >>> in_file = "../Doc/examples/opuntia.fasta" + >>> mafft_cline = MafftCommandline(mafft_exe, input=in_file) + >>> print(mafft_cline) + /opt/local/mafft ../Doc/examples/opuntia.fasta + + If the mafft binary is on the path (typically the case on a Unix style + operating system) then you don't need to supply the executable location: + + >>> from Bio.Align.Applications import MafftCommandline + >>> in_file = "../Doc/examples/opuntia.fasta" + >>> mafft_cline = MafftCommandline(input=in_file) + >>> print(mafft_cline) + mafft ../Doc/examples/opuntia.fasta + + You would typically run the command line with mafft_cline() or via + the Python subprocess module, as described in the Biopython tutorial. + + Note that MAFFT will write the alignment to stdout, which you may + want to save to a file and then parse, e.g.:: + + stdout, stderr = mafft_cline() + with open("aligned.fasta", "w") as handle: + handle.write(stdout) + from Bio import AlignIO + align = AlignIO.read("aligned.fasta", "fasta") + + Alternatively, to parse the output with AlignIO directly you can + use StringIO to turn the string into a handle:: + + stdout, stderr = mafft_cline() + from io import StringIO + from Bio import AlignIO + align = AlignIO.read(StringIO(stdout), "fasta") + + """ + + def __init__(self, cmd="mafft", **kwargs): + """Initialize the class.""" + BLOSUM_MATRICES = ["30", "45", "62", "80"] + self.parameters = [ + # **** Algorithm **** + # Automatically selects an appropriate strategy from L-INS-i, FFT-NS- + # i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2) + _Switch(["--auto", "auto"], "Automatically select strategy. Default off."), + # Distance is calculated based on the number of shared 6mers. Default: on + _Switch( + ["--6merpair", "6merpair", "sixmerpair"], + "Distance is calculated based on the number of shared " + "6mers. Default: on", + ), + # All pairwise alignments are computed with the Needleman-Wunsch + # algorithm. More accurate but slower than --6merpair. Suitable for a + # set of globally alignable sequences. Applicable to up to ~200 + # sequences. A combination with --maxiterate 1000 is recommended (G- + # INS-i). Default: off (6mer distance is used) + _Switch( + ["--globalpair", "globalpair"], + "All pairwise alignments are computed with the " + "Needleman-Wunsch algorithm. Default: off", + ), + # All pairwise alignments are computed with the Smith-Waterman + # algorithm. More accurate but slower than --6merpair. Suitable for a + # set of locally alignable sequences. Applicable to up to ~200 + # sequences. A combination with --maxiterate 1000 is recommended (L- + # INS-i). Default: off (6mer distance is used) + _Switch( + ["--localpair", "localpair"], + "All pairwise alignments are computed with the " + "Smith-Waterman algorithm. Default: off", + ), + # All pairwise alignments are computed with a local algorithm with + # the generalized affine gap cost (Altschul 1998). More accurate but + # slower than --6merpair. Suitable when large internal gaps are + # expected. Applicable to up to ~200 sequences. A combination with -- + # maxiterate 1000 is recommended (E-INS-i). Default: off (6mer + # distance is used) + _Switch( + ["--genafpair", "genafpair"], + "All pairwise alignments are computed with a local " + "algorithm with the generalized affine gap cost " + "(Altschul 1998). Default: off", + ), + # All pairwise alignments are computed with FASTA (Pearson and Lipman + # 1988). FASTA is required. Default: off (6mer distance is used) + _Switch( + ["--fastapair", "fastapair"], + "All pairwise alignments are computed with FASTA " + "(Pearson and Lipman 1988). Default: off", + ), + # Weighting factor for the consistency term calculated from pairwise + # alignments. Valid when either of --blobalpair, --localpair, -- + # genafpair, --fastapair or --blastpair is selected. Default: 2.7 + _Option( + ["--weighti", "weighti"], + "Weighting factor for the consistency term calculated " + "from pairwise alignments. Default: 2.7", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # Guide tree is built number times in the progressive stage. Valid + # with 6mer distance. Default: 2 + _Option( + ["--retree", "retree"], + "Guide tree is built number times in the progressive " + "stage. Valid with 6mer distance. Default: 2", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # Number cycles of iterative refinement are performed. Default: 0 + _Option( + ["--maxiterate", "maxiterate"], + "Number cycles of iterative refinement are performed. Default: 0", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # Number of threads to use. Default: 1 + _Option( + ["--thread", "thread"], + "Number of threads to use. Default: 1", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # Use FFT approximation in group-to-group alignment. Default: on + _Switch( + ["--fft", "fft"], + "Use FFT approximation in group-to-group alignment. Default: on", + ), + # Do not use FFT approximation in group-to-group alignment. Default: + # off + _Switch( + ["--nofft", "nofft"], + "Do not use FFT approximation in group-to-group " + "alignment. Default: off", + ), + # Alignment score is not checked in the iterative refinement stage. + # Default: off (score is checked) + _Switch( + ["--noscore", "noscore"], + "Alignment score is not checked in the iterative " + "refinement stage. Default: off (score is checked)", + ), + # Use the Myers-Miller (1988) algorithm. Default: automatically + # turned on when the alignment length exceeds 10,000 (aa/nt). + _Switch( + ["--memsave", "memsave"], + "Use the Myers-Miller (1988) algorithm. Default: " + "automatically turned on when the alignment length " + "exceeds 10,000 (aa/nt).", + ), + # Use a fast tree-building method (PartTree, Katoh and Toh 2007) with + # the 6mer distance. Recommended for a large number (> ~10,000) of + # sequences are input. Default: off + _Switch( + ["--parttree", "parttree"], + "Use a fast tree-building method with the 6mer " + "distance. Default: off", + ), + # The PartTree algorithm is used with distances based on DP. Slightly + # more accurate and slower than --parttree. Recommended for a large + # number (> ~10,000) of sequences are input. Default: off + _Switch( + ["--dpparttree", "dpparttree"], + "The PartTree algorithm is used with distances " + "based on DP. Default: off", + ), + # The PartTree algorithm is used with distances based on FASTA. + # Slightly more accurate and slower than --parttree. Recommended for + # a large number (> ~10,000) of sequences are input. FASTA is + # required. Default: off + _Switch( + ["--fastaparttree", "fastaparttree"], + "The PartTree algorithm is used with distances based " + "on FASTA. Default: off", + ), + # The number of partitions in the PartTree algorithm. Default: 50 + _Option( + ["--partsize", "partsize"], + "The number of partitions in the PartTree algorithm. Default: 50", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # Do not make alignment larger than number sequences. Valid only with + # the --*parttree options. Default: the number of input sequences + _Switch( + ["--groupsize", "groupsize"], + "Do not make alignment larger than number sequences. " + "Default: the number of input sequences", + ), + # Adjust direction according to the first sequence + # Mafft V6 beta function + _Switch( + ["--adjustdirection", "adjustdirection"], + "Adjust direction according to the first sequence. Default off.", + ), + # Adjust direction according to the first sequence + # for highly diverged data; very slow + # Mafft V6 beta function + _Switch( + ["--adjustdirectionaccurately", "adjustdirectionaccurately"], + "Adjust direction according to the first sequence," + "for highly diverged data; very slow" + "Default off.", + ), + # **** Parameter **** + # Gap opening penalty at group-to-group alignment. Default: 1.53 + _Option( + ["--op", "op"], + "Gap opening penalty at group-to-group alignment. Default: 1.53", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # Offset value, which works like gap extension penalty, for group-to- + # group alignment. Deafult: 0.123 + _Option( + ["--ep", "ep"], + "Offset value, which works like gap extension penalty, " + "for group-to- group alignment. Default: 0.123", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # Gap opening penalty at local pairwise alignment. Valid when the -- + # localpair or --genafpair option is selected. Default: -2.00 + _Option( + ["--lop", "lop"], + "Gap opening penalty at local pairwise alignment. Default: 0.123", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # Offset value at local pairwise alignment. Valid when the -- + # localpair or --genafpair option is selected. Default: 0.1 + _Option( + ["--lep", "lep"], + "Offset value at local pairwise alignment. Default: 0.1", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # Gap extension penalty at local pairwise alignment. Valid when the - + # -localpair or --genafpair option is selected. Default: -0.1 + _Option( + ["--lexp", "lexp"], + "Gap extension penalty at local pairwise alignment. Default: -0.1", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # Gap opening penalty to skip the alignment. Valid when the -- + # genafpair option is selected. Default: -6.00 + _Option( + ["--LOP", "LOP"], + "Gap opening penalty to skip the alignment. Default: -6.00", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # Gap extension penalty to skip the alignment. Valid when the -- + # genafpair option is selected. Default: 0.00 + _Option( + ["--LEXP", "LEXP"], + "Gap extension penalty to skip the alignment. Default: 0.00", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # BLOSUM number matrix (Henikoff and Henikoff 1992) is used. + # number=30, 45, 62 or 80. Default: 62 + _Option( + ["--bl", "bl"], + "BLOSUM number matrix is used. Default: 62", + checker_function=lambda x: x in BLOSUM_MATRICES, + equate=False, + ), + # JTT PAM number (Jones et al. 1992) matrix is used. number>0. + # Default: BLOSUM62 + _Option( + ["--jtt", "jtt"], + "JTT PAM number (Jones et al. 1992) matrix is used. " + "number>0. Default: BLOSUM62", + equate=False, + ), + # Transmembrane PAM number (Jones et al. 1994) matrix is used. + # number>0. Default: BLOSUM62 + _Option( + ["--tm", "tm"], + "Transmembrane PAM number (Jones et al. 1994) " + "matrix is used. number>0. Default: BLOSUM62", + filename=True, # to ensure spaced inputs are quoted + equate=False, + ), + # Use a user-defined AA scoring matrix. The format of matrixfile is + # the same to that of BLAST. Ignored when nucleotide sequences are + # input. Default: BLOSUM62 + _Option( + ["--aamatrix", "aamatrix"], + "Use a user-defined AA scoring matrix. Default: BLOSUM62", + filename=True, # to ensure spaced inputs are quoted + equate=False, + ), + # Incorporate the AA/nuc composition information into the scoring + # matrix. Default: off + _Switch( + ["--fmodel", "fmodel"], + "Incorporate the AA/nuc composition information into " + "the scoring matrix (True) or not (False, default)", + ), + # **** Output **** + # Name length for CLUSTAL and PHYLIP format output + _Option( + ["--namelength", "namelength"], + """Name length in CLUSTAL and PHYLIP output. + + MAFFT v6.847 (2011) added --namelength for use with + the --clustalout option for CLUSTAL output. + + MAFFT v7.024 (2013) added support for this with the + --phylipout option for PHYLIP output (default 10). + """, + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # Output format: clustal format. Default: off (fasta format) + _Switch( + ["--clustalout", "clustalout"], + "Output format: clustal (True) or fasta (False, default)", + ), + # Output format: phylip format. + # Added in beta with v6.847, fixed in v6.850 (2011) + _Switch( + ["--phylipout", "phylipout"], + "Output format: phylip (True), or fasta (False, default)", + ), + # Output order: same as input. Default: on + _Switch( + ["--inputorder", "inputorder"], + "Output order: same as input (True, default) or alignment " + "based (False)", + ), + # Output order: aligned. Default: off (inputorder) + _Switch( + ["--reorder", "reorder"], + "Output order: aligned (True) or in input order (False, default)", + ), + # Guide tree is output to the input.tree file. Default: off + _Switch( + ["--treeout", "treeout"], + "Guide tree is output to the input.tree file (True) or " + "not (False, default)", + ), + # Do not report progress. Default: off + _Switch( + ["--quiet", "quiet"], + "Do not report progress (True) or not (False, default).", + ), + # **** Input **** + # Assume the sequences are nucleotide. Deafult: auto + _Switch( + ["--nuc", "nuc"], + "Assume the sequences are nucleotide (True/False). Default: auto", + ), + # Assume the sequences are amino acid. Deafult: auto + _Switch( + ["--amino", "amino"], + "Assume the sequences are amino acid (True/False). Default: auto", + ), + # MAFFT has multiple --seed commands where the unaligned input is + # aligned to the seed alignment. There can be multiple seeds in the + # form: "mafft --seed align1 --seed align2 [etc] input" + # Effectively for n number of seed alignments. + # TODO - Can we use class _ArgumentList here? + _Option( + ["--seed", "seed"], + "Seed alignments given in alignment_n (fasta format) " + "are aligned with sequences in input.", + filename=True, + equate=False, + ), + # The input (must be FASTA format) + _Argument(["input"], "Input file name", filename=True, is_required=True), + # mafft-profile takes a second alignment input as an argument: + # mafft-profile align1 align2 + _Argument( + ["input1"], + "Second input file name for the mafft-profile command", + filename=True, + ), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/Applications/_Muscle.py b/code/lib/Bio/Align/Applications/_Muscle.py new file mode 100644 index 0000000..6a67e2a --- /dev/null +++ b/code/lib/Bio/Align/Applications/_Muscle.py @@ -0,0 +1,685 @@ +# Copyright 2009 by Cymon J. Cox. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Command line wrapper for the multiple alignment program MUSCLE.""" + + +from Bio.Application import _Option, _Switch, AbstractCommandline + + +class MuscleCommandline(AbstractCommandline): + r"""Command line wrapper for the multiple alignment program MUSCLE. + + http://www.drive5.com/muscle/ + + Notes + ----- + Last checked against version: 3.7, briefly against 3.8 + + References + ---------- + Edgar, Robert C. (2004), MUSCLE: multiple sequence alignment with high + accuracy and high throughput, Nucleic Acids Research 32(5), 1792-97. + + Edgar, R.C. (2004) MUSCLE: a multiple sequence alignment method with + reduced time and space complexity. BMC Bioinformatics 5(1): 113. + + Examples + -------- + >>> from Bio.Align.Applications import MuscleCommandline + >>> muscle_exe = r"C:\Program Files\Alignments\muscle3.8.31_i86win32.exe" + >>> in_file = r"C:\My Documents\unaligned.fasta" + >>> out_file = r"C:\My Documents\aligned.fasta" + >>> muscle_cline = MuscleCommandline(muscle_exe, input=in_file, out=out_file) + >>> print(muscle_cline) + "C:\Program Files\Alignments\muscle3.8.31_i86win32.exe" -in "C:\My Documents\unaligned.fasta" -out "C:\My Documents\aligned.fasta" + + You would typically run the command line with muscle_cline() or via + the Python subprocess module, as described in the Biopython tutorial. + + """ + + def __init__(self, cmd="muscle", **kwargs): + """Initialize the class.""" + CLUSTERING_ALGORITHMS = ["upgma", "upgmb", "neighborjoining"] + DISTANCE_MEASURES_ITER1 = [ + "kmer6_6", + "kmer20_3", + "kmer20_4", + "kbit20_3", + "kmer4_6", + ] + DISTANCE_MEASURES_ITER2 = DISTANCE_MEASURES_ITER1 + [ + "pctid_kimura", + "pctid_log", + ] + OBJECTIVE_SCORES = ["sp", "ps", "dp", "xp", "spf", "spm"] + TREE_ROOT_METHODS = ["pseudo", "midlongestspan", "minavgleafdist"] + + # The mucleotide arguments for the sequence type parameter in MUSCLE (-seqtype) + # were updated at somepoint in MUSCLE version 3.8. Prior to the update + # 'nucleo' was used for nucleotide. This has been updated to 'rna' and 'dna'. 'nucleo' kept for + # backwards compatibility with older MUSCLE versions. + SEQUENCE_TYPES = ["protein", "rna", "dna", "nucleo", "auto"] + WEIGHTING_SCHEMES = [ + "none", + "clustalw", + "henikoff", + "henikoffpb", + "gsc", + "threeway", + ] + self.parameters = [ + # Can't use "in" as the final alias as this + # is a reserved word in python: + _Option( + ["-in", "in", "input"], "Input filename", filename=True, equate=False + ), + _Option(["-out", "out"], "Output filename", filename=True, equate=False), + _Switch( + ["-diags", "diags"], "Find diagonals (faster for similar sequences)" + ), + _Switch(["-profile", "profile"], "Perform a profile alignment"), + _Option( + ["-in1", "in1"], + "First input filename for profile alignment", + filename=True, + equate=False, + ), + _Option( + ["-in2", "in2"], + "Second input filename for a profile alignment", + filename=True, + equate=False, + ), + # anchorspacing Integer 32 Minimum spacing + # between anchor cols + _Option( + ["-anchorspacing", "anchorspacing"], + "Minimum spacing between anchor columns", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # center Floating point [1] Center parameter. + # Should be negative. + _Option( + ["-center", "center"], + "Center parameter - should be negative", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # cluster1 upgma upgmb Clustering method. + _Option( + ["-cluster1", "cluster1"], + "Clustering method used in iteration 1", + checker_function=lambda x: x in CLUSTERING_ALGORITHMS, + equate=False, + ), + # cluster2 upgmb cluster1 is used + # neighborjoining in iteration 1 and + # 2, cluster2 in + # later iterations. + _Option( + ["-cluster2", "cluster2"], + "Clustering method used in iteration 2", + checker_function=lambda x: x in CLUSTERING_ALGORITHMS, + equate=False, + ), + # diaglength Integer 24 Minimum length of + # diagonal. + _Option( + ["-diaglength", "diaglength"], + "Minimum length of diagonal", + checker_function=lambda x: isinstance(x, int), + equate=True, + ), + # diagmargin Integer 5 Discard this many + # positions at ends + # of diagonal. + _Option( + ["-diagmargin", "diagmargin"], + "Discard this many positions at ends of diagonal", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # distance1 kmer6_6 Kmer6_6(amino) or Distance measure + # kmer20_3 Kmer4_6(nucleo) for iteration 1 + # kmer20_4 + # kbit20_3 + # kmer4_6 + _Option( + ["-distance1", "distance1"], + "Distance measure for iteration 1", + checker_function=lambda x: x in DISTANCE_MEASURES_ITER1, + equate=False, + ), + # distance2 kmer6_6 pctid_kimura Distance measure + # kmer20_3 for iterations + # kmer20_4 2, 3 ... + # kbit20_3 + # pctid_kimura + # pctid_log + _Option( + ["-distance2", "distance2"], + "Distance measure for iteration 2", + checker_function=lambda x: x in DISTANCE_MEASURES_ITER2, + equate=False, + ), + # gapextend Floating point [1] The gap extend score + _Option( + ["-gapextend", "gapextend"], + "Gap extension penalty", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # gapopen Floating point [1] The gap open score + # Must be negative. + _Option( + ["-gapopen", "gapopen"], + "Gap open score - negative number", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # hydro Integer 5 Window size for + # determining whether + # a region is + # hydrophobic. + _Option( + ["-hydro", "hydro"], + "Window size for hydrophobic region", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # hydrofactor Floating point 1.2 Multiplier for gap + # open/close + # penalties in + # hydrophobic regions + _Option( + ["-hydrofactor", "hydrofactor"], + "Multiplier for gap penalties in hydrophobic regions", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # log File name None. Log file name + # (delete existing + # file). + _Option(["-log", "log"], "Log file name", filename=True, equate=False), + # loga File name None. Log file name + # (append to existing + # file). + _Option( + ["-loga", "loga"], + "Log file name (append to existing file)", + filename=True, + equate=False, + ), + # matrix File name None. File name for + # substitution matrix + # in NCBI or WU-BLAST + # format. If you + # specify your own + # matrix, you should + # also specify: + # -gapopen + # -gapextend + # -center 0.0 + _Option( + ["-matrix", "matrix"], + "path to NCBI or WU-BLAST format protein substitution " + "matrix - also set -gapopen, -gapextend and -center", + filename=True, + equate=False, + ), + # diagbreak Integer 1 Maximum distance + # between two + # diagonals that + # allows them to + # merge into one + # diagonal. + _Option( + ["-diagbreak", "diagbreak"], + "Maximum distance between two diagonals that allows " + "them to merge into one diagonal", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + _Option( + ["-maxdiagbreak", "maxdiagbreak"], # deprecated 3.8 + "Deprecated in v3.8, use -diagbreak instead.", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # maxhours Floating point None. Maximum time to + # run in hours. The + # actual time may + # exceed requested + # limit by a few + # minutes. Decimals + # are allowed, so 1.5 + # means one hour and + # 30 minutes. + _Option( + ["-maxhours", "maxhours"], + "Maximum time to run in hours", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # maxiters Integer 1, 2 ... 16 Maximum number of + # iterations. + _Option( + ["-maxiters", "maxiters"], + "Maximum number of iterations", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # maxtrees Integer 1 Maximum number of + # new trees to build + # in iteration 2. + _Option( + ["-maxtrees", "maxtrees"], + "Maximum number of trees to build in iteration 2", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # minbestcolscore Floating point [1] Minimum score a + # column must have to + # be an anchor. + _Option( + ["-minbestcolscore", "minbestcolscore"], + "Minimum score a column must have to be an anchor", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # minsmoothscore Floating point [1] Minimum smoothed + # score a column must + # have to be an + # anchor. + _Option( + ["-minsmoothscore", "minsmoothscore"], + "Minimum smoothed score a column must have to be an anchor", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # objscore sp spm Objective score + # ps used by tree + # dp dependent + # xp refinement. + # spf sp=sum-of-pairs + # spm score. (dimer + # approximation) + # spm=sp for < 100 + # seqs, otherwise spf + # dp=dynamic + # programming score. + # ps=average profile- + # sequence score. + # xp=cross profile + # score. + _Option( + ["-objscore", "objscore"], + "Objective score used by tree dependent refinement", + checker_function=lambda x: x in OBJECTIVE_SCORES, + equate=False, + ), + # refinewindow Integer 200 Length of window + # for -refinew. + _Option( + ["-refinewindow", "refinewindow"], + "Length of window for -refinew", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # root1 pseudo pseudo Method used to root + _Option( + ["-root1", "root1"], + "Method used to root tree in iteration 1", + checker_function=lambda x: x in TREE_ROOT_METHODS, + equate=False, + ), + # root2 midlongestspan tree; root1 is + # minavgleafdist used in iteration 1 + # and 2, root2 in + # later iterations. + _Option( + ["-root2", "root2"], + "Method used to root tree in iteration 2", + checker_function=lambda x: x in TREE_ROOT_METHODS, + equate=False, + ), + # scorefile File name None File name where to + # write a score file. + # This contains one + # line for each column + # in the alignment. + # The line contains + # the letters in the + # column followed by + # the average BLOSUM62 + # score over pairs of + # letters in the + # column. + _Option( + ["-scorefile", "scorefile"], + "Score file name, contains one line for each column" + " in the alignment with average BLOSUM62 score", + filename=True, + equate=False, + ), + # seqtype protein auto Sequence type. + # dna (MUSCLE version > 3.8) + # rna (MUSCLE version > 3.8) + # auto + # nucleo (only valid for MUSCLE versions < 3.8) + _Option( + ["-seqtype", "seqtype"], + "Sequence type", + checker_function=lambda x: x in SEQUENCE_TYPES, + equate=False, + ), + # smoothscoreceil Floating point [1] Maximum value of + # column score for + # smoothing purposes. + _Option( + ["-smoothscoreceil", "smoothscoreceil"], + "Maximum value of column score for smoothing", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # smoothwindow Integer 7 Window used for + # anchor column + # smoothing. + _Option( + ["-smoothwindow", "smoothwindow"], + "Window used for anchor column smoothing", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + # spscore File name Compute SP + # objective score of + # multiple alignment. + _Option( + ["-spscore", "spscore"], + "Compute SP objective score of multiple alignment", + filename=True, + equate=False, + ), + # SUEFF Floating point value 0.1 Constant used in + # between 0 and 1. UPGMB clustering. + # Determines the + # relative fraction + # of average linkage + # (SUEFF) vs. nearest + # neighbor linkage + # (1 SUEFF). + _Option( + ["-sueff", "sueff"], + "Constant used in UPGMB clustering", + checker_function=lambda x: isinstance(x, float), + equate=False, + ), + # tree1 File name None Save tree + _Option( + ["-tree1", "tree1"], "Save Newick tree from iteration 1", equate=False + ), + # tree2 first or second + # iteration to given + # file in Newick + # (Phylip-compatible) + # format. + _Option( + ["-tree2", "tree2"], "Save Newick tree from iteration 2", equate=False + ), + # usetree File name None Use given tree as + # guide tree. Must by + # in Newick + # (Phyip-compatible) + # format. + _Option( + ["-usetree", "usetree"], + "Use given Newick tree as guide tree", + filename=True, + equate=False, + ), + # weight1 none clustalw Sequence weighting + _Option( + ["-weight1", "weight1"], + "Weighting scheme used in iteration 1", + checker_function=lambda x: x in WEIGHTING_SCHEMES, + equate=False, + ), + # weight2 henikoff scheme. + # henikoffpb weight1 is used in + # gsc iterations 1 and 2. + # clustalw weight2 is used for + # threeway tree-dependent + # refinement. + # none=all sequences + # have equal weight. + # henikoff=Henikoff & + # Henikoff weighting + # scheme. + # henikoffpb=Modified + # Henikoff scheme as + # used in PSI-BLAST. + # clustalw=CLUSTALW + # method. + # threeway=Gotoh + # three-way method. + _Option( + ["-weight2", "weight2"], + "Weighting scheme used in iteration 2", + checker_function=lambda x: x in WEIGHTING_SCHEMES, + equate=False, + ), + # ################### FORMATS #################################### + # Multiple formats can be specified on the command line + # If -msf appears it will be used regardless of other formats + # specified. If -clw appears (and not -msf), clustalw format will + # be used regardless of other formats specified. If both -clw and + # -clwstrict are specified -clwstrict will be used regardless of + # other formats specified. If -fasta is specified and not -msf, + # -clw, or clwstrict, fasta will be used. If -fasta and -html are + # specified -fasta will be used. Only if -html is specified alone + # will html be used. I kid ye not. + # clw no Write output in CLUSTALW format + # (default is FASTA). + _Switch( + ["-clw", "clw"], + "Write output in CLUSTALW format (with a MUSCLE header)", + ), + # clwstrict no Write output in CLUSTALW format with + # the "CLUSTAL W (1.81)" header rather + # than the MUSCLE version. This is + # useful when a post-processing step is + # picky about the file header. + _Switch( + ["-clwstrict", "clwstrict"], + "Write output in CLUSTALW format with version 1.81 header", + ), + # fasta yes Write output in FASTA format. + # Alternatives include clw, + # clwstrict, msf and html. + _Switch(["-fasta", "fasta"], "Write output in FASTA format"), + # html no Write output in HTML format (default + # is FASTA). + _Switch(["-html", "html"], "Write output in HTML format"), + # msf no Write output in MSF format (default + # is FASTA). + _Switch(["-msf", "msf"], "Write output in MSF format"), + # Phylip interleaved - undocumented as of 3.7 + _Switch(["-phyi", "phyi"], "Write output in PHYLIP interleaved format"), + # Phylip sequential - undocumented as of 3.7 + _Switch(["-phys", "phys"], "Write output in PHYLIP sequential format"), + # ################# Additional specified output files ######### + _Option( + ["-phyiout", "phyiout"], + "Write PHYLIP interleaved output to specified filename", + filename=True, + equate=False, + ), + _Option( + ["-physout", "physout"], + "Write PHYLIP sequential format to specified filename", + filename=True, + equate=False, + ), + _Option( + ["-htmlout", "htmlout"], + "Write HTML output to specified filename", + filename=True, + equate=False, + ), + _Option( + ["-clwout", "clwout"], + "Write CLUSTALW output (with MUSCLE header) to specified filename", + filename=True, + equate=False, + ), + _Option( + ["-clwstrictout", "clwstrictout"], + "Write CLUSTALW output (with version 1.81 header) to " + "specified filename", + filename=True, + equate=False, + ), + _Option( + ["-msfout", "msfout"], + "Write MSF format output to specified filename", + filename=True, + equate=False, + ), + _Option( + ["-fastaout", "fastaout"], + "Write FASTA format output to specified filename", + filename=True, + equate=False, + ), + # ############# END FORMATS ################################### + # anchors yes Use anchor optimization in tree + # dependent refinement iterations. + _Switch( + ["-anchors", "anchors"], + "Use anchor optimisation in tree dependent refinement iterations", + ), + # noanchors no Disable anchor optimization. Default + # is anchors. + _Switch( + ["-noanchors", "noanchors"], + "Do not use anchor optimisation in tree dependent " + "refinement iterations", + ), + # brenner no Use Steven Brenner's method for + # computing the root alignment. + _Switch( + ["-brenner", "brenner"], "Use Steve Brenner's root alignment method" + ), + # cluster no Perform fast clustering of input + # sequences. Use the tree1 option to + # save the tree. + _Switch( + ["-cluster", "cluster"], + "Perform fast clustering of input sequences, " + "use -tree1 to save tree", + ), + # dimer no Use dimer approximation for the + # SP score (faster, less accurate). + _Switch( + ["-dimer", "dimer"], + "Use faster (slightly less accurate) dimer approximation" + "for the SP score", + ), + # group yes Group similar sequences together + # in the output. This is the default. + # See also stable. + _Switch(["-group", "group"], "Group similar sequences in output"), + # ############# log-expectation profile score #################### + # One of either -le, -sp, or -sv + # + # According to the doc, spn is default and the only option for + # nucleotides: this doesn't appear to be true. -le, -sp, and -sv + # can be used and produce numerically different logs + # (what is going on?) + # + # spn fails on proteins + # le maybe Use log-expectation profile score + # (VTML240). Alternatives are to use sp + # or sv. This is the default for amino + # acid sequences. + _Switch(["-le", "le"], "Use log-expectation profile score (VTML240)"), + # sv no Use sum-of-pairs profile score + # (VTML240). Default is le. + _Switch(["-sv", "sv"], "Use sum-of-pairs profile score (VTML240)"), + # sp no Use sum-of-pairs protein profile + # score (PAM200). Default is le. + _Switch(["-sp", "sp"], "Use sum-of-pairs protein profile score (PAM200)"), + # spn maybe Use sum-of-pairs nucleotide profile + # score (BLASTZ parameters). This is + # the only option for nucleotides, + # and is therefore the default. + _Switch( + ["-spn", "spn"], "Use sum-of-pairs protein nucleotide profile score" + ), + # ########## END log-expectation profile score ################### + # quiet no Do not display progress messages. + _Switch(["-quiet", "quiet"], "Do not display progress messages"), + # refine no Input file is already aligned, skip + # first two iterations and begin tree + # dependent refinement. + _Switch(["-refine", "refine"], "Only do tree dependent refinement"), + # refinew no Refine an alignment by dividing it + # into non-overlapping windows and + # re-aligning each window. Typically + # used for whole-genome nucleotide + # alignments. + _Switch( + ["-refinew", "refinew"], + "Only do tree dependent refinement using sliding window approach", + ), + # core yes in muscle, Do not catch exceptions. + # no in muscled. + _Switch(["-core", "core"], "Do not catch exceptions"), + # nocore no in muscle, Catch exceptions and give an + # yes in muscled. error message if possible. + _Switch(["-nocore", "nocore"], "Catch exceptions"), + # stable no Preserve input order of sequences + # in output file. Default is to group + # sequences by similarity (group). + _Switch( + ["-stable", "stable"], + "Do not group similar sequences in output (not supported in v3.8)", + ), + # termgaps4 yes Use 4-way test for treatment of + # terminal gaps. + # (Cannot be disabled in this version). + # + # termgapsfull no Terminal gaps penalized with + # full penalty. [1] Not fully + # supported in this version + # + # termgapshalf yes Terminal gaps penalized with + # half penalty. [1] Not fully + # supported in this version + # + # termgapshalflonger no Terminal gaps penalized with + # half penalty if gap relative + # to longer sequence, otherwise with + # full penalty. [1] Not fully + # supported in this version + # + # verbose no Write parameter settings and + # progress messages to log file. + _Switch(["-verbose", "verbose"], "Write parameter settings and progress"), + # version no Write version string to + # stdout and exit + _Switch(["-version", "version"], "Write version string to stdout and exit"), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/Applications/_Prank.py b/code/lib/Bio/Align/Applications/_Prank.py new file mode 100644 index 0000000..4d07c56 --- /dev/null +++ b/code/lib/Bio/Align/Applications/_Prank.py @@ -0,0 +1,236 @@ +# Copyright 2009 by Cymon J. Cox. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Command line wrapper for the multiple alignment program PRANK.""" + +from Bio.Application import _Option, _Switch, AbstractCommandline + + +class PrankCommandline(AbstractCommandline): + """Command line wrapper for the multiple alignment program PRANK. + + http://www.ebi.ac.uk/goldman-srv/prank/prank/ + + Notes + ----- + Last checked against version: 081202 + + References + ---------- + Loytynoja, A. and Goldman, N. 2005. An algorithm for progressive + multiple alignment of sequences with insertions. Proceedings of + the National Academy of Sciences, 102: 10557--10562. + + Loytynoja, A. and Goldman, N. 2008. Phylogeny-aware gap placement + prevents errors in sequence alignment and evolutionary analysis. + Science, 320: 1632. + + Examples + -------- + To align a FASTA file (unaligned.fasta) with the output in aligned + FASTA format with the output filename starting with "aligned" (you + can't pick the filename explicitly), no tree output and no XML output, + use: + + >>> from Bio.Align.Applications import PrankCommandline + >>> prank_cline = PrankCommandline(d="unaligned.fasta", + ... o="aligned", # prefix only! + ... f=8, # FASTA output + ... notree=True, noxml=True) + >>> print(prank_cline) + prank -d=unaligned.fasta -o=aligned -f=8 -noxml -notree + + You would typically run the command line with prank_cline() or via + the Python subprocess module, as described in the Biopython tutorial. + + """ + + def __init__(self, cmd="prank", **kwargs): + """Initialize the class.""" + OUTPUT_FORMAT_VALUES = list(range(1, 18)) + self.parameters = [ + # ################# input/output parameters: ################## + # -d=sequence_file + _Option(["-d", "d"], "Input filename", filename=True, is_required=True), + # -t=tree_file [default: no tree, generate approximate NJ tree] + _Option(["-t", "t"], "Input guide tree filename", filename=True), + # -tree="tree_string" [tree in newick format; in double quotes] + _Option(["-tree", "tree"], "Input guide tree as Newick string"), + # -m=model_file [default: HKY2/WAG] + _Option( + ["-m", "m"], "User-defined alignment model filename. Default: HKY2/WAG" + ), + # -o=output_file [default: 'output'] + _Option( + ["-o", "o"], + "Output filenames prefix. Default: 'output'\n " + "Will write: output.?.fas (depending on requested " + "format), output.?.xml and output.?.dnd", + filename=True, + ), + # -f=output_format [default: 8] + _Option( + ["-f", "f"], + "Output alignment format. Default: 8 FASTA\n" + "Option are:\n" + "1. IG/Stanford 8. Pearson/Fasta\n" + "2. GenBank/GB 11. Phylip3.2\n" + "3. NBRF 12. Phylip\n" + "4. EMBL 14. PIR/CODATA\n" + "6. DNAStrider 15. MSF\n" + "7. Fitch 17. PAUP/NEXUS", + checker_function=lambda x: x in OUTPUT_FORMAT_VALUES, + ), + _Switch( + ["-noxml", "noxml"], + "Do not output XML files (PRANK versions earlier than v.120626)", + ), + _Switch( + ["-notree", "notree"], + "Do not output dnd tree files (PRANK versions earlier than v.120626)", + ), + _Switch( + ["-showxml", "showxml"], "Output XML files (PRANK v.120626 and later)" + ), + _Switch( + ["-showtree", "showtree"], + "Output dnd tree files (PRANK v.120626 and later)", + ), + _Switch(["-shortnames", "shortnames"], "Truncate names at first space"), + _Switch(["-quiet", "quiet"], "Reduce verbosity"), + # ###################### model parameters: ###################### + # +F [force insertions to be always skipped] + # -F [equivalent] + _Switch( + ["-F", "+F", "F"], "Force insertions to be always skipped: same as +F" + ), + # -dots [show insertion gaps as dots] + _Switch(["-dots", "dots"], "Show insertion gaps as dots"), + # -gaprate=# [gap opening rate; default: dna 0.025 / prot 0.0025] + _Option( + ["-gaprate", "gaprate"], + "Gap opening rate. Default: dna 0.025 prot 0.0025", + checker_function=lambda x: isinstance(x, float), + ), + # -gapext=# [gap extension probability; default: dna 0.5 / prot 0.5] + _Option( + ["-gapext", "gapext"], + "Gap extension probability. Default: dna 0.5 / prot 0.5", + checker_function=lambda x: isinstance(x, float), + ), + # -dnafreqs=#,#,#,# [ACGT; default: empirical] + _Option( + ["-dnafreqs", "dnafreqs"], + "DNA frequencies - 'A,C,G,T'. eg '25,25,25,25' as a quote " + "surrounded string value. Default: empirical", + checker_function=lambda x: isinstance(x, bytes), + ), + # -kappa=# [ts/tv rate ratio; default:2] + _Option( + ["-kappa", "kappa"], + "Transition/transversion ratio. Default: 2", + checker_function=lambda x: isinstance(x, int), + ), + # -rho=# [pur/pyr rate ratio; default:1] + _Option( + ["-rho", "rho"], + "Purine/pyrimidine ratio. Default: 1", + checker_function=lambda x: isinstance(x, int), + ), + # -codon [for DNA: use empirical codon model] + _Switch(["-codon", "codon"], "Codon aware alignment or not"), + # -termgap [penalise terminal gaps normally] + _Switch(["-termgap", "termgap"], "Penalise terminal gaps normally"), + # ############### other parameters: ################################ + # -nopost [do not compute posterior support; default: compute] + _Switch( + ["-nopost", "nopost"], + "Do not compute posterior support. Default: compute", + ), + # -pwdist=# [expected pairwise distance for computing guidetree; + # default: dna 0.25 / prot 0.5] + _Option( + ["-pwdist", "pwdist"], + "Expected pairwise distance for computing guidetree. " + "Default: dna 0.25 / prot 0.5", + checker_function=lambda x: isinstance(x, float), + ), + _Switch( + ["-once", "once"], "Run only once. Default: twice if no guidetree given" + ), + _Switch(["-twice", "twice"], "Always run twice"), + _Switch(["-skipins", "skipins"], "Skip insertions in posterior support"), + _Switch( + ["-uselogs", "uselogs"], + "Slower but should work for a greater number of sequences", + ), + _Switch(["-writeanc", "writeanc"], "Output ancestral sequences"), + _Switch( + ["-printnodes", "printnodes"], "Output each node; mostly for debugging" + ), + # -matresize=# [matrix resizing multiplier] + # Doesn't specify type but Float and Int work + _Option( + ["-matresize", "matresize"], + "Matrix resizing multiplier", + checker_function=lambda x: (isinstance(x, float) or isinstance(x, int)), + ), + # -matinitsize=# [matrix initial size multiplier] + # Doesn't specify type but Float and Int work + _Option( + ["-matinitsize", "matinitsize"], + "Matrix initial size multiplier", + checker_function=lambda x: (isinstance(x, float) or isinstance(x, int)), + ), + _Switch(["-longseq", "longseq"], "Save space in pairwise alignments"), + _Switch(["-pwgenomic", "pwgenomic"], "Do pairwise alignment, no guidetree"), + # -pwgenomicdist=# [distance for pairwise alignment; default: 0.3] + _Option( + ["-pwgenomicdist", "pwgenomicdist"], + "Distance for pairwise alignment. Default: 0.3", + checker_function=lambda x: isinstance(x, float), + ), + # -scalebranches=# [scale branch lengths; default: dna 1 / prot 2] + _Option( + ["-scalebranches", "scalebranches"], + "Scale branch lengths. Default: dna 1 / prot 2", + checker_function=lambda x: isinstance(x, int), + ), + # -fixedbranches=# [use fixed branch lengths] + # Assume looking for a float + _Option( + ["-fixedbranches", "fixedbranches"], + "Use fixed branch lengths of input value", + checker_function=lambda x: isinstance(x, float), + ), + # -maxbranches=# [set maximum branch length] + # Assume looking for a float + _Option( + ["-maxbranches", "maxbranches"], + "Use maximum branch lengths of input value", + checker_function=lambda x: isinstance(x, float), + ), + # -realbranches [disable branch length truncation] + _Switch( + ["-realbranches", "realbranches"], "Disable branch length truncation" + ), + _Switch(["-translate", "translate"], "Translate to protein"), + _Switch( + ["-mttranslate", "mttranslate"], "Translate to protein using mt table" + ), + # ##################### other: #################### + _Switch( + ["-convert", "convert"], + "Convert input alignment to new format. Do not perform alignment", + ), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/Applications/_Probcons.py b/code/lib/Bio/Align/Applications/_Probcons.py new file mode 100644 index 0000000..e94e026 --- /dev/null +++ b/code/lib/Bio/Align/Applications/_Probcons.py @@ -0,0 +1,137 @@ +# Copyright 2009 by Cymon J. Cox. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Command line wrapper for the multiple alignment program PROBCONS.""" + +from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline + + +class ProbconsCommandline(AbstractCommandline): + """Command line wrapper for the multiple alignment program PROBCONS. + + http://probcons.stanford.edu/ + + Notes + ----- + Last checked against version: 1.12 + + References + ---------- + Do, C.B., Mahabhashyam, M.S.P., Brudno, M., and Batzoglou, S. 2005. + PROBCONS: Probabilistic Consistency-based Multiple Sequence Alignment. + Genome Research 15: 330-340. + + Examples + -------- + To align a FASTA file (unaligned.fasta) with the output in ClustalW + format, and otherwise default settings, use: + + >>> from Bio.Align.Applications import ProbconsCommandline + >>> probcons_cline = ProbconsCommandline(input="unaligned.fasta", + ... clustalw=True) + >>> print(probcons_cline) + probcons -clustalw unaligned.fasta + + You would typically run the command line with probcons_cline() or via + the Python subprocess module, as described in the Biopython tutorial. + + Note that PROBCONS will write the alignment to stdout, which you may + want to save to a file and then parse, e.g.:: + + stdout, stderr = probcons_cline() + with open("aligned.aln", "w") as handle: + handle.write(stdout) + from Bio import AlignIO + align = AlignIO.read("aligned.fasta", "clustalw") + + Alternatively, to parse the output with AlignIO directly you can + use StringIO to turn the string into a handle:: + + stdout, stderr = probcons_cline() + from io import StringIO + from Bio import AlignIO + align = AlignIO.read(StringIO(stdout), "clustalw") + + """ + + def __init__(self, cmd="probcons", **kwargs): + """Initialize the class.""" + self.parameters = [ + # Note that some options cannot be assigned via properties using the + # original documented option (because hyphens are not valid for names in + # python), e.g cmdline.pre-training = 3 will not work + # In these cases the shortened option name should be used + # cmdline.pre = 3 + _Switch( + ["-clustalw", "clustalw"], "Use CLUSTALW output format instead of MFA" + ), + _Option( + ["-c", "c", "--consistency", "consistency"], + "Use 0 <= REPS <= 5 (default: 2) passes of consistency transformation", + checker_function=lambda x: x in range(0, 6), + equate=False, + ), + _Option( + ["-ir", "--iterative-refinement", "iterative-refinement", "ir"], + "Use 0 <= REPS <= 1000 (default: 100) passes of iterative-refinement", + checker_function=lambda x: x in range(0, 1001), + equate=False, + ), + _Option( + ["-pre", "--pre-training", "pre-training", "pre"], + "Use 0 <= REPS <= 20 (default: 0) rounds of pretraining", + checker_function=lambda x: x in range(0, 21), + equate=False, + ), + _Switch(["-pairs", "pairs"], "Generate all-pairs pairwise alignments"), + _Switch( + ["-viterbi", "viterbi"], + "Use Viterbi algorithm to generate all pairs " + "(automatically enables -pairs)", + ), + _Switch( + ["-verbose", "verbose"], "Report progress while aligning (default: off)" + ), + _Option( + ["-annot", "annot"], + "Write annotation for multiple alignment to FILENAME", + equate=False, + ), + _Option( + ["-t", "t", "--train", "train"], + "Compute EM transition probabilities, store in FILENAME " + "(default: no training)", + equate=False, + ), + _Switch( + ["-e", "e", "--emissions", "emissions"], + "Also reestimate emission probabilities (default: off)", + ), + _Option( + ["-p", "p", "--paramfile", "paramfile"], + "Read parameters from FILENAME", + equate=False, + ), + _Switch( + ["-a", "--alignment-order", "alignment-order", "a"], + "Print sequences in alignment order rather than input " + "order (default: off)", + ), + # Input file name + _Argument( + ["input"], + "Input file name. Must be multiple FASTA alignment (MFA) format", + filename=True, + is_required=True, + ), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/Applications/_TCoffee.py b/code/lib/Bio/Align/Applications/_TCoffee.py new file mode 100644 index 0000000..de337bc --- /dev/null +++ b/code/lib/Bio/Align/Applications/_TCoffee.py @@ -0,0 +1,125 @@ +# Copyright 2009 by Cymon J. Cox and Brad Chapman. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Command line wrapper for the multiple alignment program TCOFFEE.""" + + +from Bio.Application import _Option, _Switch, AbstractCommandline + + +class TCoffeeCommandline(AbstractCommandline): + """Commandline object for the TCoffee alignment program. + + http://www.tcoffee.org/Projects_home_page/t_coffee_home_page.html + + The T-Coffee command line tool has a lot of switches and options. + This wrapper implements a VERY limited number of options - if you + would like to help improve it please get in touch. + + Notes + ----- + Last checked against: Version_6.92 + + References + ---------- + T-Coffee: A novel method for multiple sequence alignments. + Notredame, Higgins, Heringa, JMB,302(205-217) 2000 + + Examples + -------- + To align a FASTA file (unaligned.fasta) with the output in ClustalW + format (file aligned.aln), and otherwise default settings, use: + + >>> from Bio.Align.Applications import TCoffeeCommandline + >>> tcoffee_cline = TCoffeeCommandline(infile="unaligned.fasta", + ... output="clustalw", + ... outfile="aligned.aln") + >>> print(tcoffee_cline) + t_coffee -output clustalw -infile unaligned.fasta -outfile aligned.aln + + You would typically run the command line with tcoffee_cline() or via + the Python subprocess module, as described in the Biopython tutorial. + + """ + + SEQ_TYPES = ["dna", "protein", "dna_protein"] + + def __init__(self, cmd="t_coffee", **kwargs): + """Initialize the class.""" + self.parameters = [ + _Option( + ["-output", "output"], + """Specify the output type. + + One (or more separated by a comma) of: + 'clustalw_aln', 'clustalw', 'gcg', 'msf_aln', + 'pir_aln', 'fasta_aln', 'phylip', 'pir_seq', 'fasta_seq' + """, + equate=False, + ), + _Option( + ["-infile", "infile"], + "Specify the input file.", + filename=True, + is_required=True, + equate=False, + ), + # Indicates the name of the alignment output by t_coffee. If the + # default is used, the alignment is named .aln + _Option( + ["-outfile", "outfile"], + "Specify the output file. Default: .aln", + filename=True, + equate=False, + ), + _Switch( + ["-convert", "convert"], "Specify you want to perform a file conversion" + ), + _Option( + ["-type", "type"], + "Specify the type of sequence being aligned", + checker_function=lambda x: x in self.SEQ_TYPES, + equate=False, + ), + _Option( + ["-outorder", "outorder"], + "Specify the order of sequence to output" + "Either 'input', 'aligned' or of " + "Fasta file with sequence order", + equate=False, + ), + _Option( + ["-matrix", "matrix"], + "Specify the filename of the substitution matrix to use. " + "Default: blosum62mt", + equate=False, + ), + _Option( + ["-gapopen", "gapopen"], + "Indicates the penalty applied for opening a gap (negative integer)", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + _Option( + ["-gapext", "gapext"], + "Indicates the penalty applied for extending a gap (negative integer)", + checker_function=lambda x: isinstance(x, int), + equate=False, + ), + _Switch(["-quiet", "quiet"], "Turn off log output"), + _Option( + ["-mode", "mode"], + "Specifies a special mode: genome, quickaln, dali, 3dcoffee", + equate=False, + ), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/Applications/__init__.py b/code/lib/Bio/Align/Applications/__init__.py new file mode 100644 index 0000000..778a7dd --- /dev/null +++ b/code/lib/Bio/Align/Applications/__init__.py @@ -0,0 +1,34 @@ +# Copyright 2009 by Peter Cock & Cymon J. Cox. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Alignment command line tool wrappers (OBSOLETE). + +We have decided to remove this module in future, and instead recommend +building your command and invoking it via the subprocess module directly. +""" + +from ._Muscle import MuscleCommandline +from ._Clustalw import ClustalwCommandline +from ._ClustalOmega import ClustalOmegaCommandline +from ._Prank import PrankCommandline +from ._Mafft import MafftCommandline +from ._Dialign import DialignCommandline +from ._Probcons import ProbconsCommandline +from ._TCoffee import TCoffeeCommandline +from ._MSAProbs import MSAProbsCommandline + +# Make this explicit, then they show up in the API docs +__all__ = ( + "MuscleCommandline", + "ClustalwCommandline", + "ClustalOmegaCommandline", + "PrankCommandline", + "MafftCommandline", + "DialignCommandline", + "ProbconsCommandline", + "TCoffeeCommandline", + "MSAProbsCommandline", +) diff --git a/code/lib/Bio/Align/Applications/__pycache__/_ClustalOmega.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_ClustalOmega.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..047e02ad83564b846eb189f6a8cf5b9086859e6c GIT binary patch literal 6898 zcmcIpOLH5?5ymb+0t89DEL*lL+Y|XkI9Tu@+p=j(5ha7MSM;o0fwdI`T*OH-X=FP2JwqfY*-062q3E~DWkv&1#7JDRJTx?48OZpExydo;M%jTFE=VP0Ca2B-VPSvfMHLWCSw@X@ySJz67TD*MDcU{Zl%&|SewgYQ0 z5CQA@0So(rxsemv14l5+v3s5?ypRn7zZY08JLg0)v>bND6+Nr<59m~Cl#^=v${@6T z55U^C9d`Q3BTE}H46II=2_iWiqRxM10B7*Xqi96Q)G#-lYi5~`tGQ@qg^xkkYH}o# zsYH&ji~bEB_nb#3Q`A?A*6R$vei#lGXJ@y!w_DlgmLK$H6-i35>W4xqT7!NHx@^gi zb^4;SDLAuwmhGXxB?5^FFS7a8LTfG^d`)ykAiT~fhasCaThJi3++^?Dj?0#s>^v4l zc{63mw%Pk={eg(twWiW$rGwFq4O3ifvTMS1MG&%; zCR_W%@44;u7#Ljd`|g0ay>q_FF50~wG=>m1voJS5-)Jc|%a~M?$&Te%8!&WFcp|XK zgv{@8(CBvsjWL??2rfUWJyP`Hwv zwSC9$?Xr`Lubg;YnMJ}b+_#lI=I2`Tb0=P#b>&8jRBoMoC0*dRb}SceIbxvPMyF4o zX5GMdiF0d-ytcJ87&vxE%}Fxb9r#$Mdw7!4l33VYyK7@PPO;;Wr<`B#Ru@ZU9WQA7 zC@hHXL6SL_@~d`7ZLw4Ll09R4B=S@yy2-#_A#yoQ^0f_Lic{BvNHm!hg}wqCg-V0K z_QILFrKS`)o*6c~V^+-YQw!$PWQIZfnX#3w_y>N(w*AQAEZiMnH63S{1(BzsL}yP3 zvBN&gCDE2k=RMP4h$&mPrF5m(a&o}0ms5tk<9gZQp<&PbOaFkQABwG&m}za*T9Ao3a}Hr&L?|i zK2_i}KF#;_biV(#j_(KfL4F9|A1S_<_+kDif9ww25&k%T0=T2#{3Jhy`1=(2Yy4^c z40tic>l^$y;AcVcTtQLd&+`|6eQ|{U3~vDQ(g^08ya|jkf@$$tVCD*p#^?D0Fee~M z<1h190KclFp5(6qew{BWnQweq#;o4tr+_<+(ceN}<7fC1`e)VHbNoEux765Wet}=) zZ)45gDI}HnC4LzcE11V>F%OMj;aB;$u>#lB$TfZ)H1G2FMx?&azXRL{+#JE(;OqP* zMtulw?Sfm0f0tXJ-2k>Tg5_KQ+Xc2ag6(q~*pGnS9Kkx=1=a)BFR&UP@Q;BFl#Y^z zfFn?B6%-}D&3E`NG>VHEYy1;_i+>8-_tXk~#y{uZNACx!cbk6!Ej;*Qli^kc_AA_}EIGlZ()6dxI!sM0I_+p(izhJj=xx1ZOK0#HH`L#Bbu_$_lm= z=*gfN>)P^XbhX-qHAGY7gbPgv^p9>2mRO(l&c8l4cXEBnvpT-VtrP3Q+mh>HcYgh{y|FIsP#6QNvuX8&Tt_sVU83YR zJDS{P+vn0+M-@n|!ERD})3MwQZk@hD`qM;oEnXO<8Y!b%ZMcISw%e^Y9Ul>1o(7N( zZ|K1U1}6v7>L+M-Bdw`GePo@d=unQT-D!4Kv+ z(@7Z`Bzo6M##B|2l%YdXMMHWdWvG#qp++)}2C5`uN|QvFVKO1ZOqrw%U6OG$&_{Vx zLkp&I?@Ik_n!5YY+{BZ+0IUY{gjhZV1+$*PFn_#s)v|+aTM9-nu<#^ zFT!}FrcVqq>37M66|#jgBr=panf0g>H>BSU;Tt=)8@Y5=C=vo}LRDQpR_@snRGX<_ zN(bE#;WX54>jpY|x}=Os_CDVHEAnPM7#ZxxD?U&t3#%H+f0ceD{D)17|FM17I;+xC5;WLk(BeGQle9)vj9UOqUf`il?qUI554pZ|eHIGqqgqp{xd4d|cSqPq_ z<`^}UfA8a3zs2V~P$Db@LfRZ>+{oR)UPFQc!t>4Pz4({m^vc0-C6Xm3br@qA8B53C zVMhcHx^hV>)W$|iLz^iZB~b=}n^3niij+22YKsTnR>4y=>8I~M=|9q>@sud`rUDE< z;0QRAX-{?H#)vN{3^d!MD2)H`kJ(|2-J{+;ODH{_R0C64aa{#`#rW&2^ru2$Dnarn zCSxqAij(ybTx(Ssj}~?YxoQKqEu-mR7Uge)4e9r&e!qbAKr8#_LeZm!fp|HZ*$TDI z$MK~-OgTek;ulu37(4Wig6{vO^j`|452s42^TSZxL!_=cKoq!`-L0rPR}>TvcU}Kl zsEGUsb+9}Lzve^%#?R&o;kH6mf5R0l0`F;;w1Ti>@DVhsS+A8ZS{&q*rO#%I<<$b5BEE7g$)#YuLlGvngU4@z19UnzYMr2M=ubuVc7gG*|%P*V1 znhjDDn0Ew@6!eOZEUR;ts@QXLqBd%+HpT)9$N`16q&~24oiDISWk7#oNNV{p5ZCz? zD-wMFT?&Jn|?VmB)0guIczUuIpMYgQoD#;k%)F8Pr+@ z^y3v8SwBSmoX6KeYFs*+#u692NqWXptAPJdCw!&;LDC}~I~+xtqZG;`A9kRIp7fjXLh)uDS870Ec3urWp!pzE77^*(sdk%g z;V-8bU3Y)50?*oR{FH2 HP3ivy7sH%Y literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Clustalw.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Clustalw.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5cfed8333148e866e3a5d015df29db62a4175557 GIT binary patch literal 14200 zcmcgz33waFb;b?wkSt49Vo6T2mhFhN2}q(H+mbBVAPJGMNB{;vQ{E^+mUv5WC14lc zU5Fy1oObTmxfAzro47}sG(FNIY3rs@6FpcGjC?z%q*t1Y#EN=@Ak-OGf|F2zQh^-F9GBpfK@_55jjFpinXJ16wi1& z)``pUPC`y}l5#Q{nT=3_k`G5HsU{za%PBQYsWUP2UdX1~na+?r6pg4u4@II8iq#_H znT@Gwqtj7zlG>WCN^7Rl?W(4s!E+++t#{mRZ`koOYf+zc`S40M>3KY6O*v z$T5n_aY|8~5)VgZR0oEpogLl_*Fvh48p>~4fu?KWnlyG+H7(dB8K@%HY|R+Y4G&BB zTea=(;r;v9*4A>Kc+N0e`(0+quvJTlW%*BF^NM9l%~iE|MkPsUDVmPFr{6j(9n9T% ztHjMYnLFqzsj4fgsp`$Jrm#BmjHXKq`3Y&Ntu)VAtD4#-OPWRAG}d&fRGE;bS5;kC zr0RsU(419mTWcx0RLM^WgBF@gTDz^GN9HD^`;|8O2nvr+NJ~l^8da&d09DL_9 zJ&+UTOlgLu!+ko6tu?I)Y4TRUq1*1rBDvFR^ujr%1E&f#t>3!GjvbR$Ors+~O)k$q zm&Wa6O}EEKX0J-VQujq;5SB(GcijSZ&o)d=X?q>yWc(L3V;GnG#{+c%Z~|bROh+QNn1^_fwlG97 zT+@%lLCeq(4buo{qXBITZKbPdTZk5=t7$v!pld+e`B2P`(sSs!bS>>dez(Z)q3h^* z^!yL*-7;JS|WO^s;8uIm^-ivfUEzwEP9w0d=8=)7|DS8RL6!ZqYEJRmmnVLkPtF#iLw`i3#It}_6 zYKQ0@(#fDM=ns+^qFZEBkIsU=CTu%L>$E{HNB%s$B9M#HE9q7AYI+UmuN86+(ZlpQ zdOh-Q25-spgdU~0(c9@Apudye6{5eJ-b3%D_ksQxeM^Y`t@Lg5 z?esY4-$CCQqQ9R$Ku^$jf&Sg}Jt6w{(g*4L=tH1?n7&`oQ}mHX66nbvpdX|kq8|q5 zqx2&KoFAnhqaUY_f%9?tiIA?Jq@SWE>8C;e1pQ2i{G>DR#dF#Y-f=X3NM^qcfs;Cz&Rdw}yh^t<$V`aN(yPG1PU`S;18 zKcGJZ{S)-XfF7kkqCciTp+5!v&qR-WiT<4ag8mZuzoNem$$gprhW?iR4)ni=T$KKS z{*nHPzJmNei~PUPztX?aSCRjBq31v7Kj~}qU&#MA{ZF7LLjOzu_jW9~fM_d$GzN(G z`N%Of2}l9bLM8)jXh3Edlo7xvV2hC13hb&tCW_oPz}0~5fE_~S8nzU6o(s5E zaCWiH$n6H~0bB=op5Q#+?E5BY?XAM+N7YSF5{0 zxd)I3ObO03u$iE>xaI}GalkA9a}UxYpg;ex%n86eU;%)62x%Em8PIbdC{;iWP!}?b z!0sQASpwxG-~oUvWL^yH)PT%OKzS*k0eG2^QGhKE$TUGAfC^XrI{@zlybJJd zA@d$!@AYL`#q*Iy%#NQfq}(85V_Q-0_H4;gAMA;sGglvvIvEaLbfu$i3}q1*aPA<` zO0~>74$7H67O;0!TtQ13GZ3AKNQyTAc^RYlt0z1*}}D7t5uZ zlMWD{C(UaJqB$ZNh;*nK0JT~*s%Jmw1X6*v&Gj6{b(fN%3%!E_Eoo(rdScs-f)h#rb*hhD;v|*t_s1ZdxWtTmaVUHd=&VA({uz z{Csh?BqzngNxIZMGlpB}>vvN*RLM3~6_0wgP;e4%DoER>m3Fh&R%}%&-77IEw~@$p zwGPrsaY2rYg!6ZKmSN}C;PX2M-)L zFrI6|v&A1jIiRcoEZ+Irk@FEd`VenPqowhfGpt#>u~M*Ya^i@v;lt5$ABvbeAp13W z`tYem3vO}hwAMLz2pg1Bd0jzgkaFuORX=N;vR4kCn%9<3S(>e8yVzDKE!8@OaCd*6 z_Z0iXdy4&yX>XsH>#jSQyV^=;nUrG>uxWtEM$YGlHlDdgL&E@VG;(*f4YaFu42WA0 zrfzfhhh}4?r#E@y0`G@;*d^M)rpu-K3)Q-u_7ErS^KSS0kzH9E9L5~7D-vldVlxRR zkRZXZy`iaXMmi?w&6?){TwTe zC*-uJK#rrhoW{IitD5e_NmoQ)k4MezC?s$s!QRbc|b+zkbl#k!nwpH9kS%zA3vemyKr&o9>M`S~#nA_WIA;}g>C zG!6!4X9^QiW$uCbBF+XXMeOEFg_Dalu`}m>&0k*YZ-ocCFOcKVi)5zYq+Go`xm2B` z&?_f}QaSDI3FUaJ*+N39lj^Rnx3#Vu?`o!#)YWr6OHPWt_N99772JF57_lfi4fQun zQca*}l5%yXP?a-%EMVX0HIS{GcoqQp|{t^ zFWX|F!%6n|j80Ap)lRZ)tf}UutbHsPE$J-0R5vWRVXbgqsa%s&{?lXJP(FT~EiRW_ zbG`ZR8!_35l}kJR(f?Gs139 zZePgwQei18rZ$|a_}05)%eve)H}}CFbBvR5PHyIeSD}l{OizIA&P=JSYkVpz%yyFA z!==vFydX^ydC4|#{)!b#!op|b5U(Y5nAW*P&q;1nheI5!)67v@IceYIZ4)@2+lTqT zqntw{AjSi}kUxpmnJ<*&p%B`axM@K-r*(QAOj7JRd>Ryv7$sT*&%oTN*Ysw(wxW{DNiGO)sVql8-l>n)5%Ikb*#;faZ^ zUXzpJ;Urz^P4h-evW!mnz?wOTFL4^lttsa6boPwh!wDqDz=`@|g~Mq7=`n_uFuO<= z!tV<=1TRB=(9+#5u$Ujd#QRO+Bv;yog79V2W$ekIWrni0(Iri3S(pav<;qMkKUX(3;)7EfZY7hnPVkRM#Fsak8*_9CdGQ}gB8 zBCa4Rc_g#tQmIg%kcuqd(_z)nZsA3GoUK-7&KGfv9Y>%^hg zNx6ERSaHTlEw>G;*Ky)qrQ@VphOVo&6VnLyQ!&GG5?#e!byDg%ObOOS#^vX^4N^!V z(O5J!JRUQj1Ki{g7Hd&=4H2r>ETd=^M6X`Uj0|MLa<_1VJlHnH#VOqGxIx&`Gi#BA z6Nfe@<7>mVskdx$1;*p%5jOoUPL48R^Vx{6@7ouJEc7 zLi-Xscr>vruTEEDRDFd_1w`PN!VLACEnF0K7EadLMSUz_?_?t(r|RN5@Jg8Z)xgXQ zwkd03kCwqMWl8MN(gEW0vR;TA>>ES?11|1`nx=}=0Fpv`vrM*Ush-lsTESHsAU^N8 z$9cQRLyh<6{o(@yk2mEtww8UI2i$3IJN`8M2d7wGr=$^+@ZkfN32$Vi{HMzpc=zQE zdh=FTX5Plhi#WNRlRG%MlaoW797ghVJU-19_s2Ory+5*tv;A>B6hZ@ur(cJJd5o>R zo0EGudAcKXH`~i|Ge&48>yOV&)*qee0P%R6+jtrA%cYjKy`lcb`*;YZ*%TgtDgWs* zo>e>c1$Jbzs!n(#pEwpuGq9m1rvt?2?JgM-a0!AqwBUnAKBnP$E9bT z&T0Y#B?NER?AI4{>Aa{kuWVuYWP%F+X7Hab7U~WKk zr#RzX4Jm~dKG4R-gtW6juvwWY-Y;XCaGxGy`%Uh7W=>3mq_&l3H(EOywD2g7W)dJgI)_IikgdAmPQnP8>4 zz_D|G-*%A@sTOK%9s6gFbuBb@2?ssw8Z~(+g!UzN&uEsijAa-bb#XP*cMjCjyDuY(#{%~L&Ekzv zn7~1U_YT(QmF3GHPa&>+`dGleIjow`y}-G!R#xT3D*OU3eu2S#EMVWbNh~9#2j9wp zE=PwwfgsaOD8uj$I{7fx}^^WmJag)ntKZf|HPTlVxcsRX?iF? zouR(Op_&lpOGkv73X$LClYz&&gUY)n zhv|kbp`D;vT-%uIe6H1N+C5X{n8x2gU=L}X33sf-FYr}OhF&(=e;Lre#4~yszrd{* z&Y^(2mBG0*Hz8M1pnZwMAT3VU7pr12UC7r?@T59buFvt)oowA>-+?O#9=ii>u<3-G z!HfM@I0=4%oM83ll$e2oHgpo)aB^bVuvg7#@Gdp^KFUtxO};~X*Q%Z4=wOKNTZ<<- zKFIX3fPKSksH=E@3l0wNOjYt@gn^0$lkYd_RO<1tuk#|zkso&yug)C%iT3hQG><27 zW7x;oaF=xjEq@}gJdf`&+R-ikO|9O|5VEZFO^&|?O8SdBZ2uNu3sbU*Yu4M*R!!PD3;`5 z;oBZAeApE_)IVqS-5H1Fh^bZ2({Y}>EG|jKwj8JPe#I|brEfIk_-Igv(Vz}IBaH@? z;E8C&uZ76?)nK{97pMp5SH2=23suA&a&AN8az~6f)#=u8f4x^YqhFxhlCA!Hn_ln4jV{cwiiCzN|U1(!J&jcnwSNcH5MIIx+pUc|9Wu zR%z}9dZ}gX(_9g68B1d6Rta~qwL)P=PH^fZ1ZhtV4F7u-p8fkDtZZ_L@{JDM&jt;P z8z*d4Oa=Sg6UBwaY92RE-qT}TyNGz`gb>;@c7a+{!jA3v;e}F;uHz!W9=o zJ{GX|6k9qkzFi~Ofb!=JTr`L7RZ*JRHWuLinT#J-=9)UAWB8B@*TwJWx)GB4$zGy>6>>$q!Y&=$ly@ zPnz5kCWk@JaR2i*lYiplWc+W6a8m(CZLc`-W`~^AnKi|1S)wuAgo7uuf@A_asis)^mPw!XBmL^PPe45!G9(OxbAzQ zV}R<*Uf`Jkjq9^Ds8QU7Mo|>}0Un{Cc;OfD#_JZv3r{KjL7wJZ+;SABp+R5}$=6^2{&jE>>eIyJA}se2QhiE3rN zB5I=k%ZjMWi3hdLq?{5Hdq#8}{U_aN?@Z@RwIa_vs8lOrYP-^$PS@4~&*Q#euH#E~ z5b-dS5wnAc#RJK_#EqTMm5jSi-}j^+voH$!5%<{DjrQio`qolw$^0BjRhsqTRQF~W zJApqu(``rngu3J2_JI>ygW=Qdo{A%G#rcFZpG7YgpBjYs@Kw*Fk(Evr>oi2QQxlVV zP1R0aOhD(U;Zs)~txstr{wKWte^|=Q48vy-$KlGtLeJ?rN=Ex;EUkgf_sqn1T0KX) z(w43;gK3dx3DrVL|T6wOt1*ldL@ znM>KLw=sMqeXIQbc#tb5yAecviFak>v$>_k#W$K}u^lVyMzLjAx7v(0_ErQ{9G`8= zdkKxIRcVj*lgI3ebH&-Fb+WC*a;0Ojw&e)LZc8P3WDVHz(mcDnyxh9G{Fa&Nc$+RS#~KXZw%w@eDth81{|!7B#Z$ATmdlUR3C%;#P{Xwm&O z=J=MI2*>Y_hAh3p=8BFsZ_cws$(2mQ)zwvIM}ddc1*TrE*$zY3u{gP?VvZLEQOr)e zlC}@Moo{Z}(xGwrAzkMjAF5xzaAI*6=Jgn+X_7O2*~j)TU&z;RVV>Fi@=lb<=17Y$ za{PGim@e7F{1yU`wQP>BpBWy$bwZ!aARh+_I|ve2u=ps18C>^>MTxKX0M1(i0os{k z6V5dm0Ab(ZS}OvETSxIA@R>?_zyo-yVqPE;SI#r8n2^efoE{W`+fi?9L72S|CvgxV zXmU6DVrIMw%Rt=Ihk)QUd_TljP2%qT>itT$5?2ZI`i7`IFdzdAAWv#}N=!pOqvaX# z1msyQpA}z&d``>f#gmYq((=>d8OYCS`Rn2vke`EWh;NGLA-^DA6yN%+hB>}1zJqI~ z`@JMCKz>=vuZUM6e^<*5F$cM+<=4dPkmn&Aq9sho3t~}R1RTF8mOiUv#AWdY?!Kve zza`#={En926)TXxr{zoHGUQb)e_y-@xvk|ZVh!?DEngGYA+JL=#QS0c@&_ZWKNOp| zzM=bViJOpbY551@HsozB?}$5)KhpAD@k7WTYq=wS1bJ7>KNdfM+|}|YfHiRVv zWGU=pE9;8^?mD{nr(zGXt9yIGha8CTSnqox!d-jCM2_`Lu)Rho=PlF36tCnrW& z>X-*qvPaK#?W1d3?8?Ua`nB7b5*F~u0=|`QwtZ_5klG`aO(6Nb#EHl(nZlQpyw@dQ zUN!Mf*%-LQ+bj%#IxuamWvQWIskFCYGbMA|kh;j_zH(t}G0*%UW`PgIDcBo#d8`N< zsE5dw%v}{^57#2c+8aHC0o@?b=`hxl9dr!qAbq|4@ZAHPtZm*wQrTp8X;(b1-x5$! zsPu0Q;q&pTnF=LgVc1|TXy~`b8Z$r7a~yf@#5TlyPx=6^7vHh1BUv%IWbzz@Q6KyE54}O{*|3(;2v&XvsX-X5?;34yPc%N%x)jh!$`2p<(m1 z@#w|p33aq^+r}Gq`q#qgb*hDj+fl!~r;eZ53gN(b58-ga)#CwHLVFt0(x4dW32zCd z1?`|JSe=JJ3b_*253rB(BKvpSrFOT+u%`gRa@YG~5%lpTVwYzHd{BJc=}52xb&%ZzYh z@q`hCJht*Mx$@FaPED@7|7~&`s=8x;hP9l7q2U~)f4K<=ae2rNq|+Z@6O^XduvcK) zY_4~N3~Y1oz^QRoMQwn z8qhp`M1cTe5x{LkVqyGo4{VdGJ-ps>`>Inzlm5Kn20&i0*uVqb94SDd3NF$^ZJ@Ll z+JWl^2ZYj@9%_ZPJU-7_@gQPgr7hRvhirKflJWo#??3>sqqS{ng0K)R1xZj7B4Pp3 z(4!l7CiGAG_Z*~gl_6FZ;K&F$=xOJNfs_K-q4go^VTe$|JVIjm`_g4|5ywp`osbY3 zo^x0>1O^5-8qwYcDbUE5rHzeTgE*C-fJE)HL8z0qF+nW$68MvY2KoV~3D>MZljBJC zu~Bf!32l^4olKMd<_14>yu=%!98~rDAb6MAA!Bp7qpaZKGJ9i@XdX!ymlpN#jWK36 zMk6R_yNx74N*Yq`PDaD()>bDIKd4^$3C$fCqEP>m93jIE`U&VVDw_9R%x zcy)-wk+xp2c|57l~tA{dA)a<*rsPN?qL^h zyw{m7r1Vw;qJ|85fM|g=k>p`ZGLudo*a#XWjuOOnv7=yw5`{j+D&+zs*gLn@Z?u~x zP2>2*G`W;sfLr;Gorcn@7sNQ;3gYx!1Fd5hfT^D;8_$%TzPv5LT#(j_jn=_~jl8ja zeMA$DmOrfe>H8a0vX7L`Qr^Qo6vrVTtvSu5{lt<+2BrYP@s{Dosr%K3)REXYF>Q^ppZISEhf#rdCZgZ(;G_{Um^>IV;LA+2rdoSGgG4# z468wU`8qLYq%@**^F%l4;0hi#@_U>qAcLT;QUK}!)kVTVI>FJHX8i%kP;ij`?j#wK zL`De<=B|7dB2}p$<|qU1`3aEkHUR|5G#qu4*6&WXD_lZj&*SKRHkoGE|PT0 zBYjlv#Qt24L&U81RYc@V)GuGGLx>41%$OJgADkp$Z~+tph&56pl3rd!V@h32lL-jU ziH&vY)TV#Dkq5eRJO|T)46=RhCK-dVod|w_?I*<7EQ`&-RX}PGag@uN1j*yLE7ez6 zu0Giuc?|hj=>mn!s7Tet*l3i3zCEfKn`}{;r-l>yC;fdvqWr|`!3+q8(^*F7qeVD| zkbOczbkd9f)*LOHdz9`A#6+=Jc2NYgo=$It##v%f*_0rGGCrUjv8XZR-Fa9=1Fdaf1|raC z08tK8IozIlN2hZtOu^HNl3+f=BcjTbq$y3-(4^;g25_rEfD_LhqPxc}^*GKMog*8Z zNAno-{A*;Mdh5`}oM_U1Lmtv;bt%ac9=i^FyqrFOB>KomFU58U?u#OP>*kKekkaAP zX-YSo&_~bdt6A>R36Dx<{Br=Qnn1IBRCSH>vi4M%I$1bUriLhbcJu$zua>|_sd4mvQe8X`jy8m%lZ4W)$`SS{OnF?lzLgS z$xIimgC0G9OZTU{W4-Fo(x}eX(ia|68*IUirm+6z1euS_-0co$y4~!|9P(_pOJ#EY zWUAX0fz|CsWWAC_sp5%rcuqo^#aGd8O}|(-BDzC^6CQWF2=6C=$TSSnO1#yaiC#yC z&N=p=K4dvqVu6E$bIx~e``53x96a03YN)R} z&cDsa?5Sh&8HRd_7jp&Vj4y&>H-yK%xjn}oo5?^MG zQ%@d%ekp3kD}xo+;VZ8l*I~EaQ@lQc3iLlQ@sG$RbV@ z|0?H6$O(XxCw4|Y)-fON6&B57I!{jpz^#NYo8Msi_WA32FmaJ;-pw1c)eYw0fyLo{X{VUG@ zPnBi|I;|GLGtoNR+1!j3g)UG)rMV0_bRY9h!gWP-lxnUlxo;jjJETgJaKgh=&Ilb- zk>LA`O9fSTNVjvp^J#VXB_DBV?6$z+ip>{kG7cv+C5Iy4Ap2Zu5s`j4i3QUeuo{>0ZJW|BKo>AOt zAhLW$c@mlrlBG&>Dc}aiG;}B;5OKBEA^Rc~32YQoEkdWxg-coSdh5#E8DgV45hp zATm#EY=++pT%jir2Enx6CP3{>P%DfH;ujZs0!URp1eS0`W$@1rJ&I?TFT~3iO@r6n#aWITNr=AQ2)Y9+r-3bV$L7+qLeEM~bT)*%xbbF$t) ze)Rm+F&SRK$RmnO+CDyLdu7v*7z$+PuX3t+vFayz9O#MUl&L`rAJchpuy_O3hse$8 z2=`f1k|V@{BEmQY(DGa;j=MAuHK72{hN$tG&;>T;G5-d%K87;oMmv{|c3*o?-#vnG zEul;@qexk04L;mCD z(bmbK7@jDhxu4PSl#aPNflF=L2;W?c@XcVB%sSac*?bVuc*yA9wh;}(DLS+Lh1l-| zfdEE9(0LH0hzGTYNp-O84sKRCSq8H_2H0i+HfYXf#XZ=KJUo2WKkgm=%>=at&Zaa( z0TLgDK}Y1E2Le+HVMq8tSYCswN?<<;FN#NoNtZm>B`+Vp=$rpf$odrM9dfUY%&HVu zhS90RcXx_!q3jsUE7Bn_V5Ht=eD(!eRN znYk-h&D>yCy-bYC8+iR0qcU#I7qWQQ_W_hd#y#V{`U=c~chYO((#jkmhp z?t;==-9@eEJQg_r6#9z)z=Qu!eIL#B_58DP86|p{DqcV$TU?M2$I>Lk^NFb=wmyht zyr5D#8nufhpC(DF%ZB|cK0cSw4Pl2hTdZumG;ftG7uzA}lpH)ce0dW=4}_sVY1MASsDV|5cEg}G{Ls<;cETL3$64`f7+Q? z{Y{Hfd{}XZ=g+zpWZy5iBOtc5Wvmrl>A9IGEX=zBQQ#!!2BId#u%x*mQ@9tQqf_P@UH zW%gU%s-cNN7@V5R|PTsLCSdt3iOw98GtYgMg)B z5J)57g3va;Y-lZE!-SkNwrJj|d(x~J<}rH1AV3zqhO%wgpqbQ1?Uwuq1cMv4_qpnN xtQi-qYK&-E$vcL;WypgZU80i2Y+Xh8ToxqKN{v0Eq0RAi@BP}%r%iX&`wnHehpzwt literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Mafft.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Mafft.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7dc9b772f51bb5f4bdab9c739938317cee383a7 GIT binary patch literal 10327 zcmcIq*?Sw;b;m|P=mlQnCA;X9^paE7%T7hFI8*vmv2eS< z%e?Zv0t@f>3dyW4DM}>N2G~Ky72rbVA zuzP5QW`8u>Y5Tp19vZ#Y-VZ|GFvElz5i{w*@n0FhReXXk;~)xp5ki)DQ7`kV6sV|I z_!JnP9-ZGZy4^e;;~ez%Ii*Nsn44qx>xbcBZDB!*+Sqse#=$TO`jHkj472fQu;38k zg~02EhlVc})I-}|C<6JoMt>68G=h+sePJF5&WxU6xj26;`~d1%QzY5r<;F^DWuMg+ zo6XgQ*5X2Qsje{I6J6m8*UY2MaKC4SUVonLd3{#9wsoCdvpma%xf~<3%z&+~Ew&ct z8IWGCvkeEvdQ3c+W(4gInb(yJre<}`d)w`tarcMJa!t=4cs}sCJqD{qW{8JvTu8`x zF5u9Um<%P3wr>QW(}iP*L(e~8H7)|vxAsNAh(Zoc=1GnS`yLMxu_pxcEzt$}0EAAF z@ba>@xO#ak8RYOt5%yT|0R;Y*OhUJ{jqP2{nrE8|fW|nxsbGnRo(o0K632XL%L0f>+9>vN(jJ4o3dl0Js8-QiMa7x zc%Oq93fb{wCHbXiWisiABf;Kf7Z$uhxL|vxVXGu_A-&CVJ6#L;7dJH;3pYG-K_o2U znaB+-qtQjSF|sEVVpFE`cTdo-rF7>JTZ1TkH=$5ZNe+BOcXFRZqb?oikv)?imC81{ zY8Roz$@VSR@R46&SXV9&j1WmI91fr~+aAg_6M2NL8|=Po9U*TFZ6P`Huq`DiX&8jU zsnhK)8+sArp8NHXxdI`9pb8?GK_P>7%_ubXp+8bC8D>o}_J6hBOE=4l*+Lrec?|F& z)N|-X$TFFl{Ya_boY?r0lM|17E zn%XgjlA%Kbl?dHoJjQb#+(kwSB?jph1H%u%nP~JHYl+EehaV%vxA1)gRW4X+%FXFE zlrMb-hjVKel3bev**v>&c%e=L_d&`QYmymyg#Of8Lkd$#2{0~#@!Tw zBm|C<^@cADehPUN>`8WN+oAAX_%(LD;XLU<$}JTM#UZ+tC<#u+Enk=+f`g*QOm?IQ z0k<0>=6f4=2?aKLWkO_CssL!Khkg@*UtF0|@J#M8sZ{XKrI<06v6^9+3C)bAq-D(# zs!94QaX!P zvHI)$t%=pI^BPv`{5Nu|1^zal$EqgjHTVMHri5F35%7|PFY-%(mnFQyF9Tkc@EZRH z;CCeaE?)ljxJ;0mt$t}JO_%7ecJz3**0C|xP=A&;^0+fwEy z{C5CZsvKbP=d@V^B7SAdKBXZ)`L|4qtMiT`b{$o~%KA^&@DUE=?M^Pl7FA3rJg zv?qm5IjlUIdQx~&?3SLCeo{oI_fKe?{yF~QhHIf-u&r37TGKXyprMzRntJKtCB3w~ zI4WIk##K!-kg`V=I>v9eag@D<+@b?vBZE*mPX*Vo!ET7IfgTaHTDK9;Y1* zu-lE#Xqv3hB*EDfjQ3uQ-mZhqMSB3L(Xye=8CU2hsr7_wbQQG}jKA~z)X+XqMVXPv zga$rTQ{)L~Kf36&1nU_CXa{|24Z8~T5ZSD?x_Y^Ok}sKP6<;YBKX`t=Znby!+N`!C zut!s+hdq1K8jvzttCvp@oYk~LVfFfkC?*D#peeL1>0FL|ck>#{jo6VJnZPx&U+*o=j+{eMgu-kgLY3)A@ zQ2b~E!#ps0B6x@zVWCa^#6rHGSmFr(hO^I&^`8=F_!Q!|<;C1=rP1kF zi1tpW@s4zYU>$&Z>Q=p^pI5^PztfFeQx0vaXp7;K@rNtDlz z7D*Y|!SwOFJue1hn7Sfnb@rY z)@sf-n@v_Tj0FUP+;>syZ3mwgo3I#dw0tne zsn2C_e0vYV?j!@QOhQsO%$kJ#IO35wQ#qtOf_rSi(eX?*8%5|SIu zFn|Hh2K;XrKVMmyCs38dr-vhkMfnQKXCyr1H(QAjX1oN^~ zN@Y;eoSL_D09r<@TBmA!a-FwFS2G%vJHBca|(NB ztG#z`FmPRw}Bf3JgPW ztIv#~ctsUZC}w<(%f5iKcAHT-D61$b-6^tA6V=6VT-bV5eaB)WeXFX9#CChZjZ8U@ z@X-HJhCcp&RYt`mF;?62BOyogRMFOM88(f%!5j@?>Qt!T_89iQ8v_ti@Ol-cSf|8;B``WY+e_)rMi6|Pt1y{-7-uAos{;s)~NAU%@~ z3gzn|4Tg}Hl2@VFgb3qyz3NjO`?P=h^wX=+99lv=j$u;ZPtn56c)(UKll*pfR5l$9 zZVvF%pk6TEV828N#L#30JdYsZ=diD|9ZQ zvV?F}JqA_eJ(Vf>q&iM#SSkuuc_ksH!3BcRtUL+UwU;qsHo219Xp}H zPs76P`kenJZqU!kUy5YE5mD|X9gQ$azmll@uhH#ubbBWXEj#evqU#wO5+b+4ELe%L YP7(%GK~>7Hm(PEu&e6wj9a7i)#Mi~s-t literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Muscle.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Muscle.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fc62ffc87a88ab913bbef41001fc11731731ce4 GIT binary patch literal 11786 zcmc&)33waFb;b>lA}NZtEFZExlI@s?2~vb)*;Z{?Bt=O~NKp|eE8a3i7nlLCBCreZ zE_iIzCav7cO6xT4eWZ8VCcV--z1yUB)B7%ZH@(vPOxx6L`oEdQ0!vb&EW2OH54&&Q z%)I$$=FOWoZ)SRMus?#oqmhBK_Kry8BlOYvOMp0mpK}Ti9?>E^%479tBc{a~aV_3R zXo+a#bcDxw;`sfGg_cvUiylGxOX!YQ3BA9c;jiYp&0`Z2Yin!c+%{IlqvK_(G0~6}PN*zrEmt@) zbB_KbHLp7^E7wGMMR2B9b;HE>s<0jO!7+At{IM}svJFwGZv?4_#~)Yn&Wnn$g<0+u z+Lb)ZtGYeL&RZp6yKH8hjph#>I+7jpd;C~;P#p1Gi^TMd4!XX^YDTptLF(ml3nKss z7+~UJP;IMKtu+|&|RbK zut!HHv&Y!vlTHQ z;Ha<~`Jz}C>?vj+oH@39&e!-;25MZEGHE6_%PK2|b^P#T(RlpmnqeN!kE7(lK$>9| zD=5|xwXnfXTIH51-ZJH^5`4S@WzxHatd@KIQr9Wt!!FBd{M6R=jdt5L#tah*@RW~j zjG?}#I(23P=uO)&-A*wnSEb!zm#~~+GAet+K43Wj*_v%rqb0^?trlCeT6O5sMib*( z$5gXhrnFm7ryiq5bE3C@Mzaj|ZPn1F9I|ugHlU>@b6O>sH_S_iH7wq$i!r7l1 z8zm?$T0ps>T1{2JZMl|h=yhK$+bLB|IR=YHe`Mko@Y{u-^Kue)8#bHAtMO~F&^!r4 zm*$yj@>=wTDDUI_)!4qswHV*U2lyZ#;=B29HFYh{_wW&Z8_Fd4?R+o4gYV;aqD+F{ z#qZ|#@csN=ejoCs+z7v)Gk%~N=Lh)%{6YTEiwW>v#~-f7_-Me1@+^NnIFDS3xlumG zbD+mTNB9Ik1bQ;a8R2<;7@Q+LoJaXla3152^Cz&%Jjjp9IzEX%3GP$;c!>KnKLPI4 z)kr1Er;(Bg^3U*-NST!>r$Q;G`5C0l@i$0G5BT!m$mhXb;Acbf7x_7G-*hz!`R7AY z3VaDE7p}&T@=Pe@B7ZYdp5E0jZ$3i{V1{pwzse~bS%=-&Yy;lInTIDf?d7@R-p;ruE82snS%!})Xm7vTJ559hD=N5T2)9?swJzXj*-0#20wJ^u%A z{!ya(C;rc%|3%XO%Kr`Y$0Ysl{69edr=&m5{|ofXlKyZ0KcGKB^t<5j;TNsO@QnGK zIOv4rBz;bb?~>nX|2u;meUj7fb9R9y-M0{Q@~p*S@tEkL55k?(kl&N zA1xl$GDPGL6%Pm05l|(=l{}g42(k?{%dWwTD@Fqjpw_3p>Q=QKbDCPL>A=y(d%dZ} zoo0m|4K3AlM2lP6P{ZJL%d85=g>Pokx2dmI>q4(^!*R7VmPl6^rWUtNy&dPKuBA+J zC9QTs$9mXKn3gHDOu62IS5sfpGBsfuD^{hV^>?UdNsCvVvX+5AD#V(;;l<$|v}5=s zrE}s9&5@77$aymLhFCh4jkPnKuXYNaukH#jLGD63j)%8p1#vyMaUWW7ud&7}Hddx`MwIVWs#NtsTYAYyeLZy+(u zNiB|t_xe+Yjm>~;N6-MdrP{t&o{HtYTtSpAK-{L>b`=igbhBKuZ1SH;Lk2^i-s={i zIE_|A`qxNjCE>0KVZ!I8QgFEIt%m88p_1~C-sG(m@N;Mr>P9X{+~}1UHek`S==^@e zp*;?Yi#G7O*=Reu{=SHP8}feq&G73kIN1FxUosk#PaHb**z%OAQ*-pg%feiBmfgza z^1M-6b_`eK0C`1+f-S?7nwTOdeWKS%pC~TK?bmp7qn&xOt~W|tKVBtZ_(i-imFn;Q z$l)C?7SUR_SR8+{Zow0Dj)PFevN3H~ZPV;xrDbAPSf)KhV$!)Xym(=^Q|j$KG$X%R zQ^)>RkZ$CdQejS=n$wE*?(S&4`es%onDHSHH_~)DZuTBjc7suSVH1{yZdYLvB^E_{6DJL)tlQY0 z)C}w=baR6>EvUcThUzX8CU)KbX8>Oc0QRG^)V!V4`U28pfG1}&)Jk_+wvZO;RqE8f zlXHcosk1Y)#f90a!iDp*h2k7kY;w~`zIz*R`7XHpj^J)ZrP|j4m)`{Ly*=QndQ+^s z7%8nUAice(^(K>S3ihxj7O1+r@e1{8A8NUQe!V{ck;$QE3zcYTpYRv~AQp4V76vXC z7$;`78VDP`wpcz8U`gd_8{D?EqRhLet|Y*E3+ zfCBBI9=%E5czAL84YM;#bI;5c3p0ywV>bB+Cj*d!IaL#4|5ysOf!;6g-g)Vw(o|9D za+U0q2myQ5YB?W3O66<}^Q4xPFYn<6bp#_jiVO%6Rd0o6$t~?6_AH&Boh_bUTwH>~ zw{&Ll+WM$|$KpswzrpZ$-hDmK>bIYdZ7Yi2_j^>q~BA|fi z0!}IrQ1A(Fu+TZcBYsF7Zmy;6p~CE&F3gg@z4Yw4En524pwYv*o~d3IMqS&ziQzrc zF{-P2y(LIx{Z^`JMr~I`;EjZ!?yR{xtUJ#KFb92@oynvsRA1i17nQdu^_bvh^5MmIBISE!c$6%pCb_pH0_4FJ;DW@e62}{bCcVGr*lGug0 z%zIy3)9RG1ia>5{hpvTsT>(h7zz3E4isoDIxE(U@`^GJ@({JVh)!a&BEu%C zU|to>?_4ks%}3*?8ZGS;-UAolT2*1h%}|{5E`2EIxely+2Uh;3VBL}>V_Pj* zI{=bEyl72}Xrlw@AeTa*uuC$wGc|v)vke%P7Xx(v(4N)c#|k?OuO6jwOT#F; zT318)(Ja^{V`MGoG9XPg#X0KwxsZA1q zpoAmKIo{}X%BkQmh3-4W5Fr^9oTxKVo>$i)XCkTl;m_I z5#b#PN!n65`HkBOqRy0pd;)e;>W1vj8-hSUw2lLHS4*k4cS4m6bjdHRbV=R+>1fh* z;0$*4iv$k@Duyrb)Q!Y%qg>hRRDw;hQ<1pU+si4zUB&23jP3iZPw6a4$Vjy%a1%;K zufE<`N~RN!Z>Rl8y!SL&Ie(S1a677DI4WLCj>Glb)$Yc{!SpmOUzpYxklv}27Bk_* z!S=l>@tGX<^QI|mE$tItmLL~gfyH22r9JE@kFo3Js4z6lv%gAdX`k?BIwvI5jXjH) z+$+lS9Hqy&Vd3wtF9^7Clmm76lYR( z(T?Hd=MlnyNKHI1>tv zjkYP$^d`2;E@L;FYC1QyfE!_19632noWkMZO+pa|EF?s%?c2)CV-FGO6G zP9cudtm_C>)1@bQ5e5%Bt3m8Z?QOJ_dV2>JO^gJ$uCL3`5r03VrG3JCddE`GYq`MJ z4Nh-==OhOg&NqxTdFt&1J-vNCY(#G0my7kXkhc&Ct!YsXoK4ENH$1a73tX=fPWV#l z?Va$2Nd>u)X0TD3eom`Ncgd9BMiC=AHH6(NS&nF@eZqT48Itay5_} zq-FxjFB_8QO98J-B}aUTC^;ZY6DB1mhMYb^P`i}uki;&AF4dL2N0k8nF6!;tG!)d^ zZAn+&77Drh11yj?gi`eLd$L=@?TUx`Weo|LCy^Y8<3C3KL7AJOD9?8JB))`fVFGl& z`c}`>z|cs1BtDF<;lxN{1bsuj0Uq*$x??1MXAI|cn=keTN4aOQ;YdNnvPsfl@{)?h zc7L&`F6MzAC>C)Epzf!ni$!jgi$$9hRBit0474WDps12yz%}} zc2}0Q1WIwCEs#?92RL))#*q^@IC0R01E>BAocO-k)k=xeH2nb8N7{Y!X6DVCdA~=? z$ByL^_$(yG>ofU8;$PJ0e^LP6#LrvBgC{Bpu5i^b~Ey0sK^-+SS zMCveENsA0m@2T!G#%9bBD_hAb36VWaC<#7NPUN$}(u!kQy3Lts*n)Ljz10#fYd9|R zcLlTBrf;-N!F1DT+Lo|=)^eSut6OYiv$X!U_62xJdlsKI5vQC$MEaBy{Aew z&v*5@A1y%i>omImNdkBiKkqaiB2iJWCzUIeB+tk_D3ugXgU6BH#D=TedqZ^eTj@mN zzhgX?WBBa)ervH<>~uPss2Q4G*V=oV}Q>zHmL9$ReA$G&?sl7tY=k4dDvAK1^bWj??wKwsTvbW+jc0RMtXX zrrEm2=4NItXsl#I7){4D{9Q}RNeV=Gp0O_^GY^rO(_o&s)0XQph-a69L*bIZJdJI* zPF)CY*iDZ)4M~G!zphiiZn9Ec=fdiukCp3&Br?rrXXY01xNzZOp@7%IoEC2I`d4k? z5*FO;nod*L-GbiHUBQ}qi?vL>E=a-T`Yl)N1NNA3UB~snZoGjZRipzF`;OU`jJjQ% zm~M7G!_y>@h}mg2KQ{wj7v}psUOmt)DD{vO`-0qZLh?*!tEKW*i8Tx;|5O`vq5pzw z4MN;&nrmowQ zFgT;WCyg;tEDHsnClG_tryeVohj zAxyJ#SV=UD1LoLf_l#WrF-SBnU!tj@07BtB?o`{MbG*FewgsK)fn`cKKeThh_NRvS zA9hIuEAYz??3)#w%aKf30erE7BuXz@D3r!~PMdX{w#k{_g`joQ>@v4)OL^Dt^UP$m zLu*gv8GQ7aJ0PdxEOcXh^>mPVn14 zyoqi`2HZ=_Rq86dGJVKRpT}6a#fXI;;#a?$X^@6z0NNKx5VEV!oA5$z%BEYA>0*y6}VNt7UPoq zEq)ES@5!~_=GOt=knlRc33x;D@qNAtxXibP=56zLfV(BfzRTYOT#@h(_zvJ7O89+V z1^gok>%0cIE@93E;09ooH~B7LgWry6t9*}}z**dmaY^p*7I1gue3yHGeL26)_W^hK zK|EIFT^<1UV}5rC_Y?k8;6C6#8^V3ae-7Ls|0u>K`7ih{f%{ml^9lbH@UJBNYyKO+ z_ayuo|1IFp57lP=Zlap<)3-Bs6L*z{dRP5SK@#&jBnZC`p1x)qK2+EUL})pt?s?i5 z3RL!s6LhW8l7=fdCip$Y4^D*>nr(xFGP&9Q z@Vw$usurXRgzly28JsyX9HzG}I^@BT48hlfu+>wn;L)6c*-1% z*1R^Co7dR-^5$w3PDW=jIPA>5qOq$tmaoS>Xm4EGEUw(VQrarz7QobcsSIIo z$f$0VS92FNwn}MhIBRqk?Tym*MsfY>yW8bDDH46?O@8qy{Jbn4cN3o^_A<~xW&MwM z^ppcUPTt(!+SuNzuHM|dQQE4$Q@Xx=wd_t{_`h$hEbfrxy`9^JHT&w!%*CCOt=Ao! z>+?Ip-uHI=#_Y~@qqgHgScR5e-_x7I+kv+(%2-_-j@8BL21RMD)$L_pH+8GV^`&iM z4ZlQiUUm*;&04i;z)q@F?RC>ZsPUEngtPKWWg^U9-D;z4*D0Y2GGV}~q-ER+maaGm zi+(@lp$vnR4@x9UfquS=oK1I41BdJH>NeZgko+#pE#!ldDDYJ>VE_zP?k5O^8kpE) zQ_U8tc*lJa6VGUqZ~%ONJTbvU%2^TRL&>stz?OSwBwnG!k4i05He###`P($ z^D4Ok96XPF!bWZ-nCxRn2pfir^1y>|1HnJu8{g3roG1Ozi{$jcr02XYV8HDu4+*4d0UGj{=Q2RZ=|@O^edc|)0$k=&#Y zOYxOMRay$XZqD$iM$%CT_0kQ~fv-{ACoxc(N%$Zmb5F|2eU%;|CdGkY$%OAo}FEzbpm^ip2yk20%KCh3@sQU~taSD$|@E4#vG`Z0N&ZPl2>Q%CRDB$on$hgD$ z%YeNU*koyXWqNITYf@vP$tLG6O!vQ&BoLk5X`{+SDu?Q)ZF4x}FnX|k-E0qwL0B!r zrL^&}k6!Mmm#%eD#Xe$BAI5vi6!vf@p!d@9738my>&wRcvOXc~FgOQ$j*T!LE^l;VRSu31=1Mz07SE?@_XwB# zm-twY7d*ruQA0dH7U@-v-Pa5XW$I;NH^c2$)8ECtV_e&iSDEd*l&={#zDl|NI>aPy zlLQ|yd0#V_L|%^63?m+3l&sZ|b)$^F|JyVCTg)Em{z1$fbU$tcd14I+!)jaKBy<0W znLAd112bdp#)n%0gAVGo2L+5=UM9X&N#sx@BUPZ3yEhtx!O5*YmNHpNB!yuII0!!; zQyCj-1m_+$fwhrK!ddyuCobTeQBNT&@p~f?5WKMxVj?-lwnod^q7#+7(8#dD!9O5W zzz)jtb^HmDLaauf4vw3`r6SO!47ZnS>G(H9z`1)a{X^5s_Ny%XE0RK#X!MeGi}x~n zxaw|tGPQ+G(XE6lGu#&e&Ex0&9gl>1Jas~yP{&nO$t6`Kqo~PD_|6^&E~BL4XD))q zmGP9Sj3;Rh#`ov--)cms-^P1f9hWpyn1>lEQOc>HgW0qKy>nsT;7$DxuJGU(P$fEj z9&*Xx)Rz{a>o|oBcAQVskx_9}t@d)&YItc0c)VJ@)7H(XXQW!?PQ6-n>AZ%j1S+e; zgT!A#iE;cq5<~W-l67Oemfd2u4M9G~1 literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Align/Applications/__pycache__/_Probcons.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_Probcons.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0b18ca708ca2c0b681653c12afebf04b2928c41 GIT binary patch literal 4271 zcmbtYTXWpD6(+eCt!}mx+i99QDXKP!*oxH3ik(Ka)T|Ucqtuc$k`qsNH059k>@H(o zwg6U&IDT_q-T%?Q#MeIgFZ8J&z@@a7HO(ZYh6@llI5;<)^FiB-i*pv9`_@8ua?7&* ztv{-#j?R~O^Hnre*!# zn#|1+JVT{M8>_3()9m6)MN`;=`J9bcOOpLeacPJ>{g~ckDisNbJbca>p#vJF_&(u6 z!Xz7H&0o7)u07;^E_fPFGq~XLRklKQ{B3`Q?9n0Z4XGTS(F9-qk$-^EZ82skX!g+8 zUfZTB&jxWeULi+5x!Z2v^Nlo>cQ%N&7VSlGBvllW9oP*$?0@FU=@-!_>EYSbl&m!S!`Y^+9OMWU4K#A8gw&x|^-e?nEMa)!VZr;+4@@jiw0 zL~^B~bRbvASn`chrmd|l(id4mwxi598u^{kD2_s^V^oqT8D&Bl>s*Q@v!PyIML-am zv$1&<@p6;8ZE6DF23*Ui2Cs-ER{Xod@9D=osu9*)$*Hil(W?s@v5cdk_e?4 z{+f-+X*P}-QD-A4AIE1zj8kLl@QstNJy3BiHC}cJ(!fbXjXXLX9Gt0PmJ&Jcff5lc zNs_T~%;6XbZ^%$YJ%kjzr>SB2QMo{kRVE@DPvS)%A5c)3f09HH0C9}$6PbzWbfYpN z6+>98kkes=@H>M>2|Y71ol+zuO+in<5V2Oqmp&K}ASELzB!uJsz~7j}h4w|o5G@yi zY`&$pVXi4nTxKJlE-zIPOXGBDg)E&exmxKV_KFd6nz(w(G2dvjT<%pmO%qfxSp}d8 z&D|&G49qqVX)^8$PT9rg=D6tXE3YrPrJp;o;vz+CpYZr>MccyYZu~GgQ%08wTf!p2 zLxo__<_u|S_Q&ZVM@rx*?LN^g)L4|BMC!&M{Az4G(fMZ?)Tke2mG;$c=j#6~^Hf|a zV_#vPqE*Fq=%B#pM^~J8@%{y`L_xHasj35q)m}QvW~eu)Ju`zkYq422hnhOi7GBjc zv&gQo>+A+*-eK>)F>{mMVz=3QnE3^Je`>~JzhobP^RK|!Vt3ew=znDTzh(se-zL3+sj5cV0OEw1Eb;P+R|;-`-6lg@YR9`hap0 z##Bl_pFPhq-BuNOx%(}`W#{p?Nb8TEkx7u1i5iiD8bcY$`eg4>CwIKCu)}=O^K`DA z)SqI0>LdS3b8C|ao8<7}!I5t7k>yE(-ynBg#IuxKYK~LO2rxw|%T1BiLwM0Vy49HJ z2(P?`pBEGFzI2{9p=oVjXp+}J4VG&c4R9H7bnGAgWr;-${-?XMaSV}!JpMjP*1l-B zKR@oI6k9X8e$3Mod938+YWwmMyIR)d2IPK?fq)`OTP zJw~@Qnv-~~+^^=RK#d;+5!?|3{)0F}>XuvRlnWoZPWMI$Hbl@Lr=bB}O;m8IcwX}& zk-z785KNeq;0fLb;L=Dezjf7U*TIUNw_LE+ZnvkrfX`_^T*c_8dLVj@W9Zsq9!5a3 zfX3Lr{L!8h*YR_Jw=`7U8ws9Y^K|3E`>;?t$QP!+ikc?5|CTm)r*(lQA{(d7L=@;N zRzFpN*S8_sk1O!qxdKhZN+e{_Firjm;Fb20u6c1eg;0-5fP;frqHdYDyc2EjUeukL zbnm2wm$@f2*5-t_ByUGwb)=yy?+2%~a7aO}_rj6m#=oAeXsQTydX(xQ})pKiu!^ zJ%kx=z)naPs<2f)P=jj-QgNLU zG(}CDgp7+_cwzH=-t%}8Ne$CwclNxS-|NIOBZ70_X4s9BYU<4mr??w4INqqRN17`N zFikYx?arQe^A8VoS~TAr?vy1Ss6>TKG=InQ&i(G;-oZtF{n9{T(|q>;HCbcd614-- zrc<{xvyLFHfR#1WN-?d0bvgc~>xzb%%eQt7iGk=TO}LM{0q!upscT>f;?Jca%gFAo z_AD&))OCuMUaZ%4=OY;i$OfFj_T8q?)mG@zD|B8i<}OSi8hW5rB|EoP)FqGmMJ-HN z(R_YN#Xw%p2_Ip}1HAHY__11!J?G%H9lL2auk0hx?|Tt zyFSPnt6g;7X&C$^8oOg4ep?7-<=a5gP?oe+;c8&IHj$P)ioz$WW!1< qY4SNu9*k8KOYy0mHxgX@XjxFFN;S5$hEo4`qVBw3yOFo-1?Rum9N(4z literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Align/Applications/__pycache__/_TCoffee.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/_TCoffee.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a2f271d92ebef03808007151173455de7035b56d GIT binary patch literal 3789 zcmbtXTW=f372aJ^Bt^-!<4e*eO*>K3N=!*gmQe>0Y{8NuwP{n^jo1_>1T2<2!{u6g z>+I092*24t{o?+Q{)zsOedtSH8lb=v8;d6r26U@+(XNc&~a;Mahp4l9XmrOt_^E(eOR}xUt7G!>n|)`7xm}0;gV?Z zrBg@l<7^{p#;sw?wnXc>Wm~*?Y;{`mdk1M8dkJSzmW`lzVM@NT;{cj+R)u|Ot_d%w@G{N9_7Ak*IT-zJvN_xI3ub>-z zI$eD=4DO-jAEOi2(1unHw}&U!J9Aa;Sz5_-ga?yaFCA2LM-s~{tnYD z%vm~mEPQpYL`m_s4*hnU;hQLx?QCw&X0yKXHS>NdgUyFBB{sP`Nn_z=ULZD=YxXae z`V$pLhRp!>8uW~G%wM#sO4Eo@1yXF0*tRkxNenQz@H>C}ZFy()53^kSc}grUd{vqSf=R)>SajKM!XSVQ7zi0A zf!Aff{r!IT*4FmM_SUa@+c!V$u-k%)J@=S_GmlBiHpc zj_-cU(Ty-68Sk#Wq2gLsGwJvH1cvYXj1}&#`Njsb|C!pzuwbaQ&bbm99I7^6Q^jmn zz+=7A3)S_kXS8B(D5Vd+=@Mh>N9mLqe5kVw7LKAbCa1sw@cSBUjps|mYrAfA7!u=? z(9;qV!NW5Ial-QH2hM`Og>*A>s@v}r*Ys1^b_Y} zv->u67RriH=a*`dP0pe)BVsy%G)O~pf+lY<)^Kgau9J z&UgEi{2WQaS+VugYHuLbTK$*hBgOVxiPdC5`L_|eqzSJWp7Io_9 zpH#FI2(+RLYE?K!z)xT@yWqYJA?TxDLeDj{oSs$HLBvhTx|%g2X*a0ibb z9v)+2KSfv1MKu1K^`A!vJ12k4A;`(&FuwWO*4C#ddx;0(xOeMBBv0}aHNJWBWjH#? zQJH(0=bz%Kke>kSn>v$kE@tvgx6I-F?5t?riM)8kz1;zcg=WorZ>RXa>xPJa*X`el zQlu+bxT5Pg!z-pTOLsg?d@2^k=F8;DRB|DQt!gyS7A>bYUo#zCU_5+?l848|x(*!* z#4=m2Q}(-41<)=LA5oDs{?h5P3Im+P-3U$8Mj1XGHq7t*cZ(uaoQM%Q6InD@AyOmM zV1|I=0%fv)5pJU>&8PA0?N}jR120Q6kqn#VV7~uF!l`QFX+<2d=S2#TreX?o)4P@k zbc`?%v5iCoD6;f0N|XqM?7S8mrLsKvSd4PH bhJ|kv1R2aIvP(K@jMQt_Yggiyz3TiIp?g4% literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Align/Applications/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Align/Applications/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..591230009e9fd2ef2ebdc71aae07a05f4ffdcb86 GIT binary patch literal 915 zcmZ8fU5^qm6dhpr8enB%6JPr%iNbD7G#X#qn)M`b6J#T_$@)!oeA9p#Nq9vc~uYXz~0Srii6nQ}u zVX@$NiI+qOL(fBA78O|Wyv(bj25X*IcwIy=^1R9$q6wRx*LX{u!86b6{F!LOw&xK) z7aiEKyaBtUISYENU%!W(Ev2B+fJ6y_B>|jC3XD=5JY+acX{N!8>1Z~+xxTyZ^`qz; z1^0MO0ig*al-N*^QK772?wJNck(^V&Bv|Ao&*)3wk}|0c#l%LsGE|aio-u~p` zeQsDm-mR};FW#oF1#4*U&sVj{4#s=~_IdBfw_=|UR(w~kCCoWOk3rUQtvdMkef&QD zs_jC>KbW|BcX|0X9!hM7MeysG%C(No;wrvj^H?)OU!^!%;U(4ah^c|!)4?!JIZLo% hcEAIKm}CZ_e!AJ#KJ51HW5X8G7Z$=|vvBf-{{fpm2G#%o literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Align/__init__.py b/code/lib/Bio/Align/__init__.py new file mode 100644 index 0000000..ac5b1cd --- /dev/null +++ b/code/lib/Bio/Align/__init__.py @@ -0,0 +1,2326 @@ +# Copyright 2000, 2004 by Brad Chapman. +# Revisions copyright 2010-2013, 2015-2018 by Peter Cock. +# All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Code for dealing with sequence alignments. + +One of the most important things in this module is the MultipleSeqAlignment +class, used in the Bio.AlignIO module. + +""" + +import sys + +from Bio.Align import _aligners +from Bio.Align import substitution_matrices +from Bio.Seq import Seq, MutableSeq, reverse_complement, UndefinedSequenceError +from Bio.SeqRecord import SeqRecord, _RestrictedDict + +# Import errors may occur here if a compiled aligners.c file +# (_aligners.pyd or _aligners.so) is missing or if the user is +# importing from within the Biopython source tree, see PR #2007: +# https://github.com/biopython/biopython/pull/2007 + + +class MultipleSeqAlignment: + """Represents a classical multiple sequence alignment (MSA). + + By this we mean a collection of sequences (usually shown as rows) which + are all the same length (usually with gap characters for insertions or + padding). The data can then be regarded as a matrix of letters, with well + defined columns. + + You would typically create an MSA by loading an alignment file with the + AlignIO module: + + >>> from Bio import AlignIO + >>> align = AlignIO.read("Clustalw/opuntia.aln", "clustal") + >>> print(align) + Alignment with 7 rows and 156 columns + TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273285|gb|AF191659.1|AF191 + TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273284|gb|AF191658.1|AF191 + TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273287|gb|AF191661.1|AF191 + TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273286|gb|AF191660.1|AF191 + TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273290|gb|AF191664.1|AF191 + TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273289|gb|AF191663.1|AF191 + TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273291|gb|AF191665.1|AF191 + + In some respects you can treat these objects as lists of SeqRecord objects, + each representing a row of the alignment. Iterating over an alignment gives + the SeqRecord object for each row: + + >>> len(align) + 7 + >>> for record in align: + ... print("%s %i" % (record.id, len(record))) + ... + gi|6273285|gb|AF191659.1|AF191 156 + gi|6273284|gb|AF191658.1|AF191 156 + gi|6273287|gb|AF191661.1|AF191 156 + gi|6273286|gb|AF191660.1|AF191 156 + gi|6273290|gb|AF191664.1|AF191 156 + gi|6273289|gb|AF191663.1|AF191 156 + gi|6273291|gb|AF191665.1|AF191 156 + + You can also access individual rows as SeqRecord objects via their index: + + >>> print(align[0].id) + gi|6273285|gb|AF191659.1|AF191 + >>> print(align[-1].id) + gi|6273291|gb|AF191665.1|AF191 + + And extract columns as strings: + + >>> print(align[:, 1]) + AAAAAAA + + Or, take just the first ten columns as a sub-alignment: + + >>> print(align[:, :10]) + Alignment with 7 rows and 10 columns + TATACATTAA gi|6273285|gb|AF191659.1|AF191 + TATACATTAA gi|6273284|gb|AF191658.1|AF191 + TATACATTAA gi|6273287|gb|AF191661.1|AF191 + TATACATAAA gi|6273286|gb|AF191660.1|AF191 + TATACATTAA gi|6273290|gb|AF191664.1|AF191 + TATACATTAA gi|6273289|gb|AF191663.1|AF191 + TATACATTAA gi|6273291|gb|AF191665.1|AF191 + + Combining this alignment slicing with alignment addition allows you to + remove a section of the alignment. For example, taking just the first + and last ten columns: + + >>> print(align[:, :10] + align[:, -10:]) + Alignment with 7 rows and 20 columns + TATACATTAAGTGTACCAGA gi|6273285|gb|AF191659.1|AF191 + TATACATTAAGTGTACCAGA gi|6273284|gb|AF191658.1|AF191 + TATACATTAAGTGTACCAGA gi|6273287|gb|AF191661.1|AF191 + TATACATAAAGTGTACCAGA gi|6273286|gb|AF191660.1|AF191 + TATACATTAAGTGTACCAGA gi|6273290|gb|AF191664.1|AF191 + TATACATTAAGTATACCAGA gi|6273289|gb|AF191663.1|AF191 + TATACATTAAGTGTACCAGA gi|6273291|gb|AF191665.1|AF191 + + Note - This object replaced the older Alignment object defined in module + Bio.Align.Generic but is not fully backwards compatible with it. + + Note - This object does NOT attempt to model the kind of alignments used + in next generation sequencing with multiple sequencing reads which are + much shorter than the alignment, and where there is usually a consensus or + reference sequence with special status. + """ + + def __init__( + self, records, alphabet=None, annotations=None, column_annotations=None + ): + """Initialize a new MultipleSeqAlignment object. + + Arguments: + - records - A list (or iterator) of SeqRecord objects, whose + sequences are all the same length. This may be an be an + empty list. + - alphabet - For backward compatibility only; its value should always + be None. + - annotations - Information about the whole alignment (dictionary). + - column_annotations - Per column annotation (restricted dictionary). + This holds Python sequences (lists, strings, tuples) + whose length matches the number of columns. A typical + use would be a secondary structure consensus string. + + You would normally load a MSA from a file using Bio.AlignIO, but you + can do this from a list of SeqRecord objects too: + + >>> from Bio.Seq import Seq + >>> from Bio.SeqRecord import SeqRecord + >>> from Bio.Align import MultipleSeqAlignment + >>> a = SeqRecord(Seq("AAAACGT"), id="Alpha") + >>> b = SeqRecord(Seq("AAA-CGT"), id="Beta") + >>> c = SeqRecord(Seq("AAAAGGT"), id="Gamma") + >>> align = MultipleSeqAlignment([a, b, c], + ... annotations={"tool": "demo"}, + ... column_annotations={"stats": "CCCXCCC"}) + >>> print(align) + Alignment with 3 rows and 7 columns + AAAACGT Alpha + AAA-CGT Beta + AAAAGGT Gamma + >>> align.annotations + {'tool': 'demo'} + >>> align.column_annotations + {'stats': 'CCCXCCC'} + """ + if alphabet is not None: + raise ValueError("The alphabet argument is no longer supported") + + self._records = [] + if records: + self.extend(records) + + # Annotations about the whole alignment + if annotations is None: + annotations = {} + elif not isinstance(annotations, dict): + raise TypeError("annotations argument should be a dict") + self.annotations = annotations + + # Annotations about each column of the alignment + if column_annotations is None: + column_annotations = {} + # Handle this via the property set function which will validate it + self.column_annotations = column_annotations + + def _set_per_column_annotations(self, value): + if not isinstance(value, dict): + raise TypeError( + "The per-column-annotations should be a (restricted) dictionary." + ) + # Turn this into a restricted-dictionary (and check the entries) + if len(self): + # Use the standard method to get the length + expected_length = self.get_alignment_length() + self._per_col_annotations = _RestrictedDict(length=expected_length) + self._per_col_annotations.update(value) + else: + # Bit of a problem case... number of columns is undefined + self._per_col_annotations = None + if value: + raise ValueError( + "Can't set per-column-annotations without an alignment" + ) + + def _get_per_column_annotations(self): + if self._per_col_annotations is None: + # This happens if empty at initialisation + if len(self): + # Use the standard method to get the length + expected_length = self.get_alignment_length() + else: + # Should this raise an exception? Compare SeqRecord behaviour... + expected_length = 0 + self._per_col_annotations = _RestrictedDict(length=expected_length) + return self._per_col_annotations + + column_annotations = property( + fget=_get_per_column_annotations, + fset=_set_per_column_annotations, + doc="""Dictionary of per-letter-annotation for the sequence.""", + ) + + def _str_line(self, record, length=50): + """Return a truncated string representation of a SeqRecord (PRIVATE). + + This is a PRIVATE function used by the __str__ method. + """ + if record.seq.__class__.__name__ == "CodonSeq": + if len(record.seq) <= length: + return "%s %s" % (record.seq, record.id) + else: + return "%s...%s %s" % ( + record.seq[: length - 3], + record.seq[-3:], + record.id, + ) + else: + if len(record.seq) <= length: + return "%s %s" % (record.seq, record.id) + else: + return "%s...%s %s" % ( + record.seq[: length - 6], + record.seq[-3:], + record.id, + ) + + def __str__(self): + """Return a multi-line string summary of the alignment. + + This output is intended to be readable, but large alignments are + shown truncated. A maximum of 20 rows (sequences) and 50 columns + are shown, with the record identifiers. This should fit nicely on a + single screen. e.g. + + >>> from Bio.Seq import Seq + >>> from Bio.SeqRecord import SeqRecord + >>> from Bio.Align import MultipleSeqAlignment + >>> a = SeqRecord(Seq("ACTGCTAGCTAG"), id="Alpha") + >>> b = SeqRecord(Seq("ACT-CTAGCTAG"), id="Beta") + >>> c = SeqRecord(Seq("ACTGCTAGATAG"), id="Gamma") + >>> align = MultipleSeqAlignment([a, b, c]) + >>> print(align) + Alignment with 3 rows and 12 columns + ACTGCTAGCTAG Alpha + ACT-CTAGCTAG Beta + ACTGCTAGATAG Gamma + + See also the alignment's format method. + """ + rows = len(self._records) + lines = [ + "Alignment with %i rows and %i columns" + % (rows, self.get_alignment_length()) + ] + if rows <= 20: + lines.extend(self._str_line(rec) for rec in self._records) + else: + lines.extend(self._str_line(rec) for rec in self._records[:18]) + lines.append("...") + lines.append(self._str_line(self._records[-1])) + return "\n".join(lines) + + def __repr__(self): + """Return a representation of the object for debugging. + + The representation cannot be used with eval() to recreate the object, + which is usually possible with simple python objects. For example: + + + + The hex string is the memory address of the object, see help(id). + This provides a simple way to visually distinguish alignments of + the same size. + """ + # A doctest for __repr__ would be nice, but __class__ comes out differently + # if run via the __main__ trick. + return "<%s instance (%i records of length %i) at %x>" % ( + self.__class__, + len(self._records), + self.get_alignment_length(), + id(self), + ) + # This version is useful for doing eval(repr(alignment)), + # but it can be VERY long: + # return "%s(%r)" \ + # % (self.__class__, self._records) + + def __format__(self, format_spec): + """Return the alignment as a string in the specified file format. + + The format should be a lower case string supported as an output + format by Bio.AlignIO (such as "fasta", "clustal", "phylip", + "stockholm", etc), which is used to turn the alignment into a + string. + + e.g. + + >>> from Bio.Align import MultipleSeqAlignment + >>> a = SeqRecord(Seq("ACTGCTAGCTAG"), id="Alpha", description="") + >>> b = SeqRecord(Seq("ACT-CTAGCTAG"), id="Beta", description="") + >>> c = SeqRecord(Seq("ACTGCTAGATAG"), id="Gamma", description="") + >>> align = MultipleSeqAlignment([a, b, c]) + >>> print(format(align, "fasta")) + >Alpha + ACTGCTAGCTAG + >Beta + ACT-CTAGCTAG + >Gamma + ACTGCTAGATAG + + >>> print(format(align, "phylip")) + 3 12 + Alpha ACTGCTAGCT AG + Beta ACT-CTAGCT AG + Gamma ACTGCTAGAT AG + + """ + if format_spec: + from io import StringIO + from Bio import AlignIO + + handle = StringIO() + AlignIO.write([self], handle, format_spec) + return handle.getvalue() + else: + # Follow python convention and default to using __str__ + return str(self) + + def __iter__(self): + """Iterate over alignment rows as SeqRecord objects. + + e.g. + + >>> from Bio.Align import MultipleSeqAlignment + >>> a = SeqRecord(Seq("ACTGCTAGCTAG"), id="Alpha") + >>> b = SeqRecord(Seq("ACT-CTAGCTAG"), id="Beta") + >>> c = SeqRecord(Seq("ACTGCTAGATAG"), id="Gamma") + >>> align = MultipleSeqAlignment([a, b, c]) + >>> for record in align: + ... print(record.id) + ... print(record.seq) + ... + Alpha + ACTGCTAGCTAG + Beta + ACT-CTAGCTAG + Gamma + ACTGCTAGATAG + """ + return iter(self._records) + + def __len__(self): + """Return the number of sequences in the alignment. + + Use len(alignment) to get the number of sequences (i.e. the number of + rows), and alignment.get_alignment_length() to get the length of the + longest sequence (i.e. the number of columns). + + This is easy to remember if you think of the alignment as being like a + list of SeqRecord objects. + """ + return len(self._records) + + def get_alignment_length(self): + """Return the maximum length of the alignment. + + All objects in the alignment should (hopefully) have the same + length. This function will go through and find this length + by finding the maximum length of sequences in the alignment. + + >>> from Bio.Align import MultipleSeqAlignment + >>> a = SeqRecord(Seq("ACTGCTAGCTAG"), id="Alpha") + >>> b = SeqRecord(Seq("ACT-CTAGCTAG"), id="Beta") + >>> c = SeqRecord(Seq("ACTGCTAGATAG"), id="Gamma") + >>> align = MultipleSeqAlignment([a, b, c]) + >>> align.get_alignment_length() + 12 + + If you want to know the number of sequences in the alignment, + use len(align) instead: + + >>> len(align) + 3 + + """ + max_length = 0 + + for record in self._records: + if len(record.seq) > max_length: + max_length = len(record.seq) + + return max_length + + def extend(self, records): + """Add more SeqRecord objects to the alignment as rows. + + They must all have the same length as the original alignment. For + example, + + >>> from Bio.Seq import Seq + >>> from Bio.SeqRecord import SeqRecord + >>> from Bio.Align import MultipleSeqAlignment + >>> a = SeqRecord(Seq("AAAACGT"), id="Alpha") + >>> b = SeqRecord(Seq("AAA-CGT"), id="Beta") + >>> c = SeqRecord(Seq("AAAAGGT"), id="Gamma") + >>> d = SeqRecord(Seq("AAAACGT"), id="Delta") + >>> e = SeqRecord(Seq("AAA-GGT"), id="Epsilon") + + First we create a small alignment (three rows): + + >>> align = MultipleSeqAlignment([a, b, c]) + >>> print(align) + Alignment with 3 rows and 7 columns + AAAACGT Alpha + AAA-CGT Beta + AAAAGGT Gamma + + Now we can extend this alignment with another two rows: + + >>> align.extend([d, e]) + >>> print(align) + Alignment with 5 rows and 7 columns + AAAACGT Alpha + AAA-CGT Beta + AAAAGGT Gamma + AAAACGT Delta + AAA-GGT Epsilon + + Because the alignment object allows iteration over the rows as + SeqRecords, you can use the extend method with a second alignment + (provided its sequences have the same length as the original alignment). + """ + if len(self): + # Use the standard method to get the length + expected_length = self.get_alignment_length() + else: + # Take the first record's length + records = iter(records) # records arg could be list or iterator + try: + rec = next(records) + except StopIteration: + # Special case, no records + return + expected_length = len(rec) + self._append(rec, expected_length) + # Can now setup the per-column-annotations as well, set to None + # while missing the length: + self.column_annotations = {} + # Now continue to the rest of the records as usual + + for rec in records: + self._append(rec, expected_length) + + def append(self, record): + """Add one more SeqRecord object to the alignment as a new row. + + This must have the same length as the original alignment (unless this is + the first record). + + >>> from Bio import AlignIO + >>> align = AlignIO.read("Clustalw/opuntia.aln", "clustal") + >>> print(align) + Alignment with 7 rows and 156 columns + TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273285|gb|AF191659.1|AF191 + TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273284|gb|AF191658.1|AF191 + TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273287|gb|AF191661.1|AF191 + TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273286|gb|AF191660.1|AF191 + TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273290|gb|AF191664.1|AF191 + TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273289|gb|AF191663.1|AF191 + TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273291|gb|AF191665.1|AF191 + >>> len(align) + 7 + + We'll now construct a dummy record to append as an example: + + >>> from Bio.Seq import Seq + >>> from Bio.SeqRecord import SeqRecord + >>> dummy = SeqRecord(Seq("N"*156), id="dummy") + + Now append this to the alignment, + + >>> align.append(dummy) + >>> print(align) + Alignment with 8 rows and 156 columns + TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273285|gb|AF191659.1|AF191 + TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273284|gb|AF191658.1|AF191 + TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273287|gb|AF191661.1|AF191 + TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273286|gb|AF191660.1|AF191 + TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273290|gb|AF191664.1|AF191 + TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273289|gb|AF191663.1|AF191 + TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273291|gb|AF191665.1|AF191 + NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...NNN dummy + >>> len(align) + 8 + + """ + if self._records: + self._append(record, self.get_alignment_length()) + else: + self._append(record) + + def _append(self, record, expected_length=None): + """Validate and append a record (PRIVATE).""" + if not isinstance(record, SeqRecord): + raise TypeError("New sequence is not a SeqRecord object") + + # Currently the get_alignment_length() call is expensive, so we need + # to avoid calling it repeatedly for __init__ and extend, hence this + # private _append method + if expected_length is not None and len(record) != expected_length: + # TODO - Use the following more helpful error, but update unit tests + # raise ValueError("New sequence is not of length %i" + # % self.get_alignment_length()) + raise ValueError("Sequences must all be the same length") + + self._records.append(record) + + def __add__(self, other): + """Combine two alignments with the same number of rows by adding them. + + If you have two multiple sequence alignments (MSAs), there are two ways to think + about adding them - by row or by column. Using the extend method adds by row. + Using the addition operator adds by column. For example, + + >>> from Bio.Seq import Seq + >>> from Bio.SeqRecord import SeqRecord + >>> from Bio.Align import MultipleSeqAlignment + >>> a1 = SeqRecord(Seq("AAAAC"), id="Alpha") + >>> b1 = SeqRecord(Seq("AAA-C"), id="Beta") + >>> c1 = SeqRecord(Seq("AAAAG"), id="Gamma") + >>> a2 = SeqRecord(Seq("GT"), id="Alpha") + >>> b2 = SeqRecord(Seq("GT"), id="Beta") + >>> c2 = SeqRecord(Seq("GT"), id="Gamma") + >>> left = MultipleSeqAlignment([a1, b1, c1], + ... annotations={"tool": "demo", "name": "start"}, + ... column_annotations={"stats": "CCCXC"}) + >>> right = MultipleSeqAlignment([a2, b2, c2], + ... annotations={"tool": "demo", "name": "end"}, + ... column_annotations={"stats": "CC"}) + + Now, let's look at these two alignments: + + >>> print(left) + Alignment with 3 rows and 5 columns + AAAAC Alpha + AAA-C Beta + AAAAG Gamma + >>> print(right) + Alignment with 3 rows and 2 columns + GT Alpha + GT Beta + GT Gamma + + And add them: + + >>> combined = left + right + >>> print(combined) + Alignment with 3 rows and 7 columns + AAAACGT Alpha + AAA-CGT Beta + AAAAGGT Gamma + + For this to work, both alignments must have the same number of records (here + they both have 3 rows): + + >>> len(left) + 3 + >>> len(right) + 3 + >>> len(combined) + 3 + + The individual rows are SeqRecord objects, and these can be added together. Refer + to the SeqRecord documentation for details of how the annotation is handled. This + example is a special case in that both original alignments shared the same names, + meaning when the rows are added they also get the same name. + + Any common annotations are preserved, but differing annotation is lost. This is + the same behaviour used in the SeqRecord annotations and is designed to prevent + accidental propagation of inappropriate values: + + >>> combined.annotations + {'tool': 'demo'} + + Similarly any common per-column-annotations are combined: + + >>> combined.column_annotations + {'stats': 'CCCXCCC'} + + """ + if not isinstance(other, MultipleSeqAlignment): + raise NotImplementedError + if len(self) != len(other): + raise ValueError( + "When adding two alignments they must have the same length" + " (i.e. same number or rows)" + ) + merged = (left + right for left, right in zip(self, other)) + # Take any common annotation: + annotations = {} + for k, v in self.annotations.items(): + if k in other.annotations and other.annotations[k] == v: + annotations[k] = v + column_annotations = {} + for k, v in self.column_annotations.items(): + if k in other.column_annotations: + column_annotations[k] = v + other.column_annotations[k] + return MultipleSeqAlignment( + merged, annotations=annotations, column_annotations=column_annotations + ) + + def __getitem__(self, index): + """Access part of the alignment. + + Depending on the indices, you can get a SeqRecord object + (representing a single row), a Seq object (for a single columns), + a string (for a single characters) or another alignment + (representing some part or all of the alignment). + + align[r,c] gives a single character as a string + align[r] gives a row as a SeqRecord + align[r,:] gives a row as a SeqRecord + align[:,c] gives a column as a Seq + + align[:] and align[:,:] give a copy of the alignment + + Anything else gives a sub alignment, e.g. + align[0:2] or align[0:2,:] uses only row 0 and 1 + align[:,1:3] uses only columns 1 and 2 + align[0:2,1:3] uses only rows 0 & 1 and only cols 1 & 2 + + We'll use the following example alignment here for illustration: + + >>> from Bio.Seq import Seq + >>> from Bio.SeqRecord import SeqRecord + >>> from Bio.Align import MultipleSeqAlignment + >>> a = SeqRecord(Seq("AAAACGT"), id="Alpha") + >>> b = SeqRecord(Seq("AAA-CGT"), id="Beta") + >>> c = SeqRecord(Seq("AAAAGGT"), id="Gamma") + >>> d = SeqRecord(Seq("AAAACGT"), id="Delta") + >>> e = SeqRecord(Seq("AAA-GGT"), id="Epsilon") + >>> align = MultipleSeqAlignment([a, b, c, d, e]) + + You can access a row of the alignment as a SeqRecord using an integer + index (think of the alignment as a list of SeqRecord objects here): + + >>> first_record = align[0] + >>> print("%s %s" % (first_record.id, first_record.seq)) + Alpha AAAACGT + >>> last_record = align[-1] + >>> print("%s %s" % (last_record.id, last_record.seq)) + Epsilon AAA-GGT + + You can also access use python's slice notation to create a sub-alignment + containing only some of the SeqRecord objects: + + >>> sub_alignment = align[2:5] + >>> print(sub_alignment) + Alignment with 3 rows and 7 columns + AAAAGGT Gamma + AAAACGT Delta + AAA-GGT Epsilon + + This includes support for a step, i.e. align[start:end:step], which + can be used to select every second sequence: + + >>> sub_alignment = align[::2] + >>> print(sub_alignment) + Alignment with 3 rows and 7 columns + AAAACGT Alpha + AAAAGGT Gamma + AAA-GGT Epsilon + + Or to get a copy of the alignment with the rows in reverse order: + + >>> rev_alignment = align[::-1] + >>> print(rev_alignment) + Alignment with 5 rows and 7 columns + AAA-GGT Epsilon + AAAACGT Delta + AAAAGGT Gamma + AAA-CGT Beta + AAAACGT Alpha + + You can also use two indices to specify both rows and columns. Using simple + integers gives you the entry as a single character string. e.g. + + >>> align[3, 4] + 'C' + + This is equivalent to: + + >>> align[3][4] + 'C' + + or: + + >>> align[3].seq[4] + 'C' + + To get a single column (as a string) use this syntax: + + >>> align[:, 4] + 'CCGCG' + + Or, to get part of a column, + + >>> align[1:3, 4] + 'CG' + + However, in general you get a sub-alignment, + + >>> print(align[1:5, 3:6]) + Alignment with 4 rows and 3 columns + -CG Beta + AGG Gamma + ACG Delta + -GG Epsilon + + This should all seem familiar to anyone who has used the NumPy + array or matrix objects. + """ + if isinstance(index, int): + # e.g. result = align[x] + # Return a SeqRecord + return self._records[index] + elif isinstance(index, slice): + # e.g. sub_align = align[i:j:k] + new = MultipleSeqAlignment(self._records[index]) + if self.column_annotations and len(new) == len(self): + # All rows kept (although could have been reversed) + # Preserve the column annotations too, + for k, v in self.column_annotations.items(): + new.column_annotations[k] = v + return new + elif len(index) != 2: + raise TypeError("Invalid index type.") + + # Handle double indexing + row_index, col_index = index + if isinstance(row_index, int): + # e.g. row_or_part_row = align[6, 1:4], gives a SeqRecord + return self._records[row_index][col_index] + elif isinstance(col_index, int): + # e.g. col_or_part_col = align[1:5, 6], gives a string + return "".join(rec[col_index] for rec in self._records[row_index]) + else: + # e.g. sub_align = align[1:4, 5:7], gives another alignment + new = MultipleSeqAlignment( + rec[col_index] for rec in self._records[row_index] + ) + if self.column_annotations and len(new) == len(self): + # All rows kept (although could have been reversed) + # Preserve the column annotations too, + for k, v in self.column_annotations.items(): + new.column_annotations[k] = v[col_index] + return new + + def sort(self, key=None, reverse=False): + """Sort the rows (SeqRecord objects) of the alignment in place. + + This sorts the rows alphabetically using the SeqRecord object id by + default. The sorting can be controlled by supplying a key function + which must map each SeqRecord to a sort value. + + This is useful if you want to add two alignments which use the same + record identifiers, but in a different order. For example, + + >>> from Bio.Seq import Seq + >>> from Bio.SeqRecord import SeqRecord + >>> from Bio.Align import MultipleSeqAlignment + >>> align1 = MultipleSeqAlignment([ + ... SeqRecord(Seq("ACGT"), id="Human"), + ... SeqRecord(Seq("ACGG"), id="Mouse"), + ... SeqRecord(Seq("ACGC"), id="Chicken"), + ... ]) + >>> align2 = MultipleSeqAlignment([ + ... SeqRecord(Seq("CGGT"), id="Mouse"), + ... SeqRecord(Seq("CGTT"), id="Human"), + ... SeqRecord(Seq("CGCT"), id="Chicken"), + ... ]) + + If you simple try and add these without sorting, you get this: + + >>> print(align1 + align2) + Alignment with 3 rows and 8 columns + ACGTCGGT + ACGGCGTT + ACGCCGCT Chicken + + Consult the SeqRecord documentation which explains why you get a + default value when annotation like the identifier doesn't match up. + However, if we sort the alignments first, then add them we get the + desired result: + + >>> align1.sort() + >>> align2.sort() + >>> print(align1 + align2) + Alignment with 3 rows and 8 columns + ACGCCGCT Chicken + ACGTCGTT Human + ACGGCGGT Mouse + + As an example using a different sort order, you could sort on the + GC content of each sequence. + + >>> from Bio.SeqUtils import GC + >>> print(align1) + Alignment with 3 rows and 4 columns + ACGC Chicken + ACGT Human + ACGG Mouse + >>> align1.sort(key = lambda record: GC(record.seq)) + >>> print(align1) + Alignment with 3 rows and 4 columns + ACGT Human + ACGC Chicken + ACGG Mouse + + There is also a reverse argument, so if you wanted to sort by ID + but backwards: + + >>> align1.sort(reverse=True) + >>> print(align1) + Alignment with 3 rows and 4 columns + ACGG Mouse + ACGT Human + ACGC Chicken + + """ + if key is None: + self._records.sort(key=lambda r: r.id, reverse=reverse) + else: + self._records.sort(key=key, reverse=reverse) + + @property + def substitutions(self): + """Return an Array with the number of substitutions of letters in the alignment. + + As an example, consider a multiple sequence alignment of three DNA sequences: + + >>> from Bio.Seq import Seq + >>> from Bio.SeqRecord import SeqRecord + >>> from Bio.Align import MultipleSeqAlignment + >>> seq1 = SeqRecord(Seq("ACGT"), id="seq1") + >>> seq2 = SeqRecord(Seq("A--A"), id="seq2") + >>> seq3 = SeqRecord(Seq("ACGT"), id="seq3") + >>> seq4 = SeqRecord(Seq("TTTC"), id="seq4") + >>> alignment = MultipleSeqAlignment([seq1, seq2, seq3, seq4]) + >>> print(alignment) + Alignment with 4 rows and 4 columns + ACGT seq1 + A--A seq2 + ACGT seq3 + TTTC seq4 + + >>> m = alignment.substitutions + >>> print(m) + A C G T + A 3.0 0.5 0.0 2.5 + C 0.5 1.0 0.0 2.0 + G 0.0 0.0 1.0 1.0 + T 2.5 2.0 1.0 1.0 + + + Note that the matrix is symmetric, with counts divided equally on both + sides of the diagonal. For example, the total number of substitutions + between A and T in the alignment is 3.5 + 3.5 = 7. + + Any weights associated with the sequences are taken into account when + calculating the substitution matrix. For example, given the following + multiple sequence alignment:: + + GTATC 0.5 + AT--C 0.8 + CTGTC 1.0 + + For the first column we have:: + + ('A', 'G') : 0.5 * 0.8 = 0.4 + ('C', 'G') : 0.5 * 1.0 = 0.5 + ('A', 'C') : 0.8 * 1.0 = 0.8 + + """ + letters = set.union(*[set(record.seq) for record in self]) + try: + letters.remove("-") + except KeyError: + pass + letters = "".join(sorted(letters)) + m = substitution_matrices.Array(letters, dims=2) + for rec_num1, alignment1 in enumerate(self): + seq1 = alignment1.seq + weight1 = alignment1.annotations.get("weight", 1.0) + for rec_num2, alignment2 in enumerate(self): + if rec_num1 == rec_num2: + break + seq2 = alignment2.seq + weight2 = alignment2.annotations.get("weight", 1.0) + for residue1, residue2 in zip(seq1, seq2): + if residue1 == "-": + continue + if residue2 == "-": + continue + m[(residue1, residue2)] += weight1 * weight2 + + m += m.transpose() + m /= 2.0 + + return m + + +class PairwiseAlignment: + """Represents a pairwise sequence alignment. + + Internally, the pairwise alignment is stored as the path through + the traceback matrix, i.e. a tuple of pairs of indices corresponding + to the vertices of the path in the traceback matrix. + """ + + def __init__(self, target, query, path, score): + """Initialize a new PairwiseAlignment object. + + Arguments: + - target - The first sequence, as a plain string, without gaps. + - query - The second sequence, as a plain string, without gaps. + - path - The path through the traceback matrix, defining an + alignment. + - score - The alignment score. + + You would normally obtain a PairwiseAlignment object by iterating + over a PairwiseAlignments object. + """ + self.target = target + self.query = query + self.score = score + self.path = path + + def __eq__(self, other): + return self.path == other.path + + def __ne__(self, other): + return self.path != other.path + + def __lt__(self, other): + return self.path < other.path + + def __le__(self, other): + return self.path <= other.path + + def __gt__(self, other): + return self.path > other.path + + def __ge__(self, other): + return self.path >= other.path + + def __getitem__(self, key): + """Return self[key]. + + Currently, this is implemented only for indices of the form + + self[:, :] + + which returns a copy of the PairwiseAlignment object, and + + self[:, i:] + self[:, :j] + self[:, i:j] + + which returns a new PairwiseAlignment object spanning the indicated + columns. + + >>> from Bio.Align import PairwiseAligner + >>> aligner = PairwiseAligner() + >>> alignments = aligner.align("ACCGGTTT", "ACGGGTT") + >>> alignment = alignments[0] + >>> print(alignment) + ACCGG-TTT + ||-||-||- + AC-GGGTT- + + >>> alignment[:, 1:] # doctest:+ELLIPSIS + + >>> print(alignment[:, 1:]) + ACCGG-TTT + |-||-||- + AC-GGGTT- + + >>> print(alignment[:, 2:]) + ACCGG-TTT + -||-||- + AC-GGGTT- + + >>> print(alignment[:, 3:]) + ACCGG-TTT + ||-||- + ACGGGTT- + + >>> print(alignment[:, 3:-1]) + ACCGG-TTT + ||-|| + ACGGGTT + + """ + if isinstance(key, slice): + if key.indices(len(self)) == (0, 2, 1): + target = self.target + query = self.query + path = self.path + score = self.score + return PairwiseAlignment(target, query, path, score) + raise NotImplementedError + if isinstance(key, int): + raise NotImplementedError + if isinstance(key, tuple): + try: + row, col = key + except ValueError: + raise ValueError("only tuples of length 2 can be alignment indices") + if isinstance(row, int): + raise NotImplementedError + if isinstance(row, slice): + if row.indices(len(self)) != (0, 2, 1): + raise NotImplementedError + if isinstance(col, int): + raise NotImplementedError + if isinstance(col, slice): + n, m = self.shape + start_index, stop_index, step = col.indices(m) + if step != 1: + raise NotImplementedError + path = [] + index = 0 + path_iterator = iter(self.path) + starts = next(path_iterator) + for ends in path_iterator: + index += max(e - s for s, e in zip(starts, ends)) + if start_index < index: + offset = index - start_index + point = tuple( + e - offset if s < e else s for s, e in zip(starts, ends) + ) + path.append(point) + break + starts = ends + while True: + if stop_index <= index: + offset = index - stop_index + point = tuple( + e - offset if s < e else s for s, e in zip(starts, ends) + ) + path.append(point) + break + path.append(ends) + starts = ends + ends = next(path_iterator) + index += max(e - s for s, e in zip(starts, ends)) + path = tuple(path) + target = self.target + query = self.query + if path == self.path: + score = self.score + else: + score = None + return PairwiseAlignment(target, query, path, score) + raise TypeError("second index must be an integer or slice") + raise TypeError("first index must be an integer or slice") + raise TypeError("alignment indices must be integers, slices, or tuples") + + def _convert_sequence_string(self, sequence): + if isinstance(sequence, (bytes, bytearray)): + return sequence.decode() + if isinstance(sequence, str): + return sequence + if isinstance(sequence, Seq): + return str(sequence) + try: # check if target is a SeqRecord + sequence = sequence.seq + except AttributeError: + pass + else: + return str(sequence) + try: + view = memoryview(sequence) + except TypeError: + pass + else: + if view.format == "c": + return str(sequence) + return None + + def __format__(self, format_spec): + return self.format(format_spec) + + def format(self, fmt="", **kwargs): + """Return the alignment as a string in the specified file format. + + Arguments: + - fmt - File format. Acceptable values are + "" : create a human-readable representation of the + alignment (default); + "BED": create a line representing the alignment in + the Browser Extensible Data (BED) file format; + "PSL": create a line representing the alignment in + the Pattern Space Layout (PSL) file format as + generated by BLAT; + "SAM": create a line representing the alignment in + the Sequence Alignment/Map (SAM) format. + - mask - PSL format only. Specify if repeat regions in the target + sequence are masked and should be reported in the + `repMatches` field of the PSL file instead of in the + `matches` field. Acceptable values are + None : no masking (default); + "lower": masking by lower-case characters; + "upper": masking by upper-case characters. + - wildcard - PSL format only. Report alignments to the wildcard + character in the target or query sequence in the + `nCount` field of the PSL file instead of in the + `matches`, `misMatches`, or `repMatches` fields. + Default value is 'N'. + """ + if fmt == "": + return self._format_pretty(**kwargs) + elif fmt == "psl": + return self._format_psl(**kwargs) + elif fmt == "bed": + return self._format_bed(**kwargs) + elif fmt == "sam": + return self._format_sam(**kwargs) + else: + raise ValueError("Unknown format %s" % fmt) + + def _format_pretty(self): + seq1 = self._convert_sequence_string(self.target) + if seq1 is None: + return self._format_generalized() + seq2 = self._convert_sequence_string(self.query) + if seq2 is None: + return self._format_generalized() + n1 = len(seq1) + n2 = len(seq2) + aligned_seq1 = "" + aligned_seq2 = "" + pattern = "" + path = self.path + if path[0][1] > path[-1][1]: # mapped to reverse strand + path = tuple((c1, n2 - c2) for (c1, c2) in path) + seq2 = reverse_complement(seq2) + end1, end2 = path[0] + if end1 > 0 or end2 > 0: + end = max(end1, end2) + aligned_seq1 += " " * (end - end1) + seq1[:end1] + aligned_seq2 += " " * (end - end2) + seq2[:end2] + pattern += " " * end + start1 = end1 + start2 = end2 + for end1, end2 in path[1:]: + if end1 == start1: + gap = end2 - start2 + aligned_seq1 += "-" * gap + aligned_seq2 += seq2[start2:end2] + pattern += "-" * gap + elif end2 == start2: + gap = end1 - start1 + aligned_seq1 += seq1[start1:end1] + aligned_seq2 += "-" * gap + pattern += "-" * gap + else: + s1 = seq1[start1:end1] + s2 = seq2[start2:end2] + aligned_seq1 += s1 + aligned_seq2 += s2 + for c1, c2 in zip(s1, s2): + if c1 == c2: + pattern += "|" + else: + pattern += "." + start1 = end1 + start2 = end2 + aligned_seq1 += seq1[end1:] + aligned_seq2 += seq2[end2:] + return "%s\n%s\n%s\n" % (aligned_seq1, pattern, aligned_seq2) + + def _format_generalized(self): + seq1 = self.target + seq2 = self.query + aligned_seq1 = [] + aligned_seq2 = [] + pattern = [] + path = self.path + end1, end2 = path[0] + if end1 > 0 or end2 > 0: + if end1 <= end2: + for c2 in seq2[: end2 - end1]: + s2 = str(c2) + s1 = " " * len(s2) + aligned_seq1.append(s1) + aligned_seq2.append(s2) + pattern.append(s1) + else: # end1 > end2 + for c1 in seq1[: end1 - end2]: + s1 = str(c1) + s2 = " " * len(s1) + aligned_seq1.append(s1) + aligned_seq2.append(s2) + pattern.append(s2) + start1 = end1 + start2 = end2 + for end1, end2 in path[1:]: + if end1 == start1: + for c2 in seq2[start2:end2]: + s2 = str(c2) + s1 = "-" * len(s2) + aligned_seq1.append(s1) + aligned_seq2.append(s2) + pattern.append(s1) + start2 = end2 + elif end2 == start2: + for c1 in seq1[start1:end1]: + s1 = str(c1) + s2 = "-" * len(s1) + aligned_seq1.append(s1) + aligned_seq2.append(s2) + pattern.append(s2) + start1 = end1 + else: + for c1, c2 in zip(seq1[start1:end1], seq2[start2:end2]): + s1 = str(c1) + s2 = str(c2) + m1 = len(s1) + m2 = len(s2) + if c1 == c2: + p = "|" + else: + p = "." + if m1 < m2: + space = (m2 - m1) * " " + s1 += space + pattern.append(p * m1 + space) + elif m1 > m2: + space = (m1 - m2) * " " + s2 += space + pattern.append(p * m2 + space) + else: + pattern.append(p * m1) + aligned_seq1.append(s1) + aligned_seq2.append(s2) + start1 = end1 + start2 = end2 + aligned_seq1 = " ".join(aligned_seq1) + aligned_seq2 = " ".join(aligned_seq2) + pattern = " ".join(pattern) + return "%s\n%s\n%s\n" % (aligned_seq1, pattern, aligned_seq2) + + def _format_bed(self): + query = self.query + target = self.target + # variable names follow those in the BED file format specification + try: + chrom = target.id + except AttributeError: + chrom = "target" + try: + name = query.id + except AttributeError: + name = "query" + path = self.path + if path[0][1] < path[-1][1]: # mapped to forward strand + strand = "+" + else: # mapped to reverse strand + strand = "-" + n2 = len(query) + path = tuple((c1, n2 - c2) for (c1, c2) in path) + score = self.score + blockSizes = [] + tStarts = [] + tStart, qStart = path[0] + for tEnd, qEnd in path[1:]: + tCount = tEnd - tStart + qCount = qEnd - qStart + if tCount == 0: + qStart = qEnd + elif qCount == 0: + tStart = tEnd + else: + assert tCount == qCount + tStarts.append(tStart) + blockSizes.append(tCount) + tStart = tEnd + qStart = qEnd + chromStart = tStarts[0] + chromEnd = tStarts[-1] + blockSizes[-1] + blockStarts = [tStart - chromStart for tStart in tStarts] + blockCount = len(blockSizes) + blockSizes = ",".join(map(str, blockSizes)) + "," + blockStarts = ",".join(map(str, blockStarts)) + "," + thickStart = chromStart + thickEnd = chromEnd + itemRgb = "0" + words = [ + chrom, + str(chromStart), + str(chromEnd), + name, + str(score), + strand, + str(thickStart), + str(thickEnd), + itemRgb, + str(blockCount), + blockSizes, + blockStarts, + ] + line = "\t".join(words) + "\n" + return line + + def _format_psl(self, mask=False, wildcard="N"): + path = self.path + if not path: # alignment consists of gaps only + return "" + query = self.query + target = self.target + try: + qName = query.id + except AttributeError: + qName = "query" + try: + query = query.seq + except AttributeError: + pass + try: + tName = target.id + except AttributeError: + tName = "target" + try: + target = target.seq + except AttributeError: + pass + n1 = len(target) + n2 = len(query) + try: + seq1 = bytes(target) + except TypeError: # string + seq1 = bytes(target, "ASCII") + except UndefinedSequenceError: # sequence contents is unknown + seq1 = None + if path[0][1] < path[-1][1]: # mapped to forward strand + strand = "+" + seq2 = query + else: # mapped to reverse strand + strand = "-" + seq2 = reverse_complement(query) + path = tuple((c1, n2 - c2) for (c1, c2) in path) + try: + seq2 = bytes(seq2) + except TypeError: # string + seq2 = bytes(seq2, "ASCII") + except UndefinedSequenceError: # sequence contents is unknown + seq2 = None + if wildcard is not None: + if mask == "upper": + wildcard = ord(wildcard.lower()) + else: + wildcard = ord(wildcard.upper()) + # variable names follow those in the PSL file format specification + matches = 0 + misMatches = 0 + repMatches = 0 + nCount = 0 + qNumInsert = 0 + qBaseInsert = 0 + tNumInsert = 0 + tBaseInsert = 0 + qSize = n2 + tSize = n1 + blockSizes = [] + qStarts = [] + tStarts = [] + tStart, qStart = path[0] + for tEnd, qEnd in path[1:]: + tCount = tEnd - tStart + qCount = qEnd - qStart + if tCount == 0: + if qStart > 0 and qEnd < qSize: + qNumInsert += 1 + qBaseInsert += qCount + qStart = qEnd + elif qCount == 0: + if tStart > 0 and tEnd < tSize: + tNumInsert += 1 + tBaseInsert += tCount + tStart = tEnd + else: + assert tCount == qCount + tStarts.append(tStart) + qStarts.append(qStart) + blockSizes.append(tCount) + if seq1 is None or seq2 is None: + # contents of at least one sequence is unknown; + # count all alignments as matches: + matches += tCount + else: + s1 = seq1[tStart:tEnd] + s2 = seq2[qStart:qEnd] + if mask == "lower": + for u1, u2, c1 in zip(s1.upper(), s2.upper(), s1): + if u1 == wildcard or u2 == wildcard: + nCount += 1 + elif u1 == u2: + if u1 == c1: + matches += 1 + else: + repMatches += 1 + else: + misMatches += 1 + elif mask == "upper": + for u1, u2, c1 in zip(s1.lower(), s2.lower(), s1): + if u1 == wildcard or u2 == wildcard: + nCount += 1 + elif u1 == u2: + if u1 == c1: + matches += 1 + else: + repMatches += 1 + else: + misMatches += 1 + else: + for u1, u2 in zip(s1.upper(), s2.upper()): + if u1 == wildcard or u2 == wildcard: + nCount += 1 + elif u1 == u2: + matches += 1 + else: + misMatches += 1 + tStart = tEnd + qStart = qEnd + tStart = tStarts[0] # start of alignment in target + qStart = qStarts[0] # start of alignment in query + tEnd = tStarts[-1] + blockSizes[-1] # end of alignment in target + qEnd = qStarts[-1] + blockSizes[-1] # end of alignment in query + if strand == "-": + qStart, qEnd = qSize - qEnd, qSize - qStart + blockCount = len(blockSizes) + blockSizes = ",".join(map(str, blockSizes)) + "," + qStarts = ",".join(map(str, qStarts)) + "," + tStarts = ",".join(map(str, tStarts)) + "," + words = [ + str(matches), + str(misMatches), + str(repMatches), + str(nCount), + str(qNumInsert), + str(qBaseInsert), + str(tNumInsert), + str(tBaseInsert), + strand, + qName, + str(qSize), + str(qStart), + str(qEnd), + tName, + str(tSize), + str(tStart), + str(tEnd), + str(blockCount), + blockSizes, + qStarts, + tStarts, + ] + line = "\t".join(words) + "\n" + return line + + def _format_sam(self): + query = self.query + target = self.target + try: + qName = query.id + except AttributeError: + qName = "query" + else: + query = query.seq + try: + rName = target.id + except AttributeError: + rName = "target" + else: + target = target.seq + n1 = len(target) + n2 = len(query) + pos = None + qSize = n2 + tSize = n1 + cigar = [] + path = self.path + if path[0][1] < path[-1][1]: # mapped to forward strand + flag = 0 + seq = query + else: # mapped to reverse strand + flag = 16 + seq = reverse_complement(query) + path = tuple((c1, n2 - c2) for (c1, c2) in path) + try: + seq = bytes(seq) + except TypeError: # string + pass + else: + seq = str(seq, "ASCII") + tStart, qStart = path[0] + for tEnd, qEnd in path[1:]: + tCount = tEnd - tStart + qCount = qEnd - qStart + if tCount == 0: + length = qCount + if pos is None or tEnd == tSize: + operation = "S" + else: + operation = "I" + qStart = qEnd + elif qCount == 0: + if tStart > 0 and tEnd < tSize: + length = tCount + operation = "D" + else: + operation = None + tStart = tEnd + else: + assert tCount == qCount + if pos is None: + pos = tStart + tStart = tEnd + qStart = qEnd + operation = "M" + length = tCount + if operation is not None: + cigar.append(str(length) + operation) + mapQ = 255 # not available + rNext = "*" + pNext = 0 + tLen = 0 + qual = "*" + cigar = "".join(cigar) + tag = "AS:i:%d" % int(round(self.score)) + words = [ + qName, + str(flag), + rName, + str(pos + 1), # 1-based coordinates + str(mapQ), + cigar, + rNext, + str(pNext), + str(tLen), + seq, + qual, + tag, + ] + line = "\t".join(words) + "\n" + return line + + def __str__(self): + return self.format() + + def __len__(self): + """Return the number of sequences in the alignment, which is always 2.""" + return 2 + + @property + def shape(self): + """Return the shape of the alignment as a tuple of two integer values. + + The first integer value is the number of sequences in the alignment as + returned by len(alignment), which is always 2 for pairwise alignments. + + The second integer value is the number of columns in the alignment when + it is printed, and is equal to the sum of the number of matches, number + of mismatches, and the total length of gaps in the target and query. + Sequence sections beyond the aligned segment are not included in the + number of columns. + + For example, + + >>> from Bio import Align + >>> aligner = Align.PairwiseAligner() + >>> aligner.mode = "global" + >>> alignments = aligner.align("GACCTG", "CGATCG") + >>> alignment = alignments[0] + >>> print(alignment) + -GACCT-G + -||--|-| + CGA--TCG + + >>> len(alignment) + 2 + >>> alignment.shape + (2, 8) + >>> aligner.mode = "local" + >>> alignments = aligner.align("GACCTG", "CGATCG") + >>> alignment = alignments[0] + >>> print(alignment) + GACCT-G + ||--|-| + CGA--TCG + + >>> len(alignment) + 2 + >>> alignment.shape + (2, 7) + """ + path = self.path + if path[0][1] > path[-1][1]: # mapped to reverse strand + n2 = len(self.query) + path = tuple((c1, n2 - c2) for (c1, c2) in path) + start = path[0] + n = len(start) + m = 0 + for end in path[1:]: + m += max(e - s for s, e in zip(start, end)) + start = end + return (n, m) + + @property + def aligned(self): + """Return the indices of subsequences aligned to each other. + + This property returns the start and end indices of subsequences + in the target and query sequence that were aligned to each other. + If the alignment between target (t) and query (q) consists of N + chunks, you get two tuples of length N: + + (((t_start1, t_end1), (t_start2, t_end2), ..., (t_startN, t_endN)), + ((q_start1, q_end1), (q_start2, q_end2), ..., (q_startN, q_endN))) + + For example, + + >>> from Bio import Align + >>> aligner = Align.PairwiseAligner() + >>> alignments = aligner.align("GAACT", "GAT") + >>> alignment = alignments[0] + >>> print(alignment) + GAACT + ||--| + GA--T + + >>> alignment.aligned + (((0, 2), (4, 5)), ((0, 2), (2, 3))) + >>> alignment = alignments[1] + >>> print(alignment) + GAACT + |-|-| + G-A-T + + >>> alignment.aligned + (((0, 1), (2, 3), (4, 5)), ((0, 1), (1, 2), (2, 3))) + + Note that different alignments may have the same subsequences + aligned to each other. In particular, this may occur if alignments + differ from each other in terms of their gap placement only: + + >>> aligner.mismatch_score = -10 + >>> alignments = aligner.align("AAACAAA", "AAAGAAA") + >>> len(alignments) + 2 + >>> print(alignments[0]) + AAAC-AAA + |||--||| + AAA-GAAA + + >>> alignments[0].aligned + (((0, 3), (4, 7)), ((0, 3), (4, 7))) + >>> print(alignments[1]) + AAA-CAAA + |||--||| + AAAG-AAA + + >>> alignments[1].aligned + (((0, 3), (4, 7)), ((0, 3), (4, 7))) + + The property can be used to identify alignments that are identical + to each other in terms of their aligned sequences. + """ + segments1 = [] + segments2 = [] + path = self.path + if path[0][1] < path[-1][1]: # mapped to forward strand + i1, i2 = path[0] + for node in path[1:]: + j1, j2 = node + if j1 > i1 and j2 > i2: + segment1 = (i1, j1) + segment2 = (i2, j2) + segments1.append(segment1) + segments2.append(segment2) + i1, i2 = j1, j2 + else: # mapped to reverse strand + n2 = len(self.query) + i1, i2 = path[0] + i2 = n2 - i2 + for node in path[1:]: + j1, j2 = node + j2 = n2 - j2 + if j1 > i1 and j2 > i2: + segment1 = (i1, j1) + segment2 = (n2 - i2, n2 - j2) + segments1.append(segment1) + segments2.append(segment2) + i1, i2 = j1, j2 + return tuple(segments1), tuple(segments2) + + def sort(self, key=None, reverse=False): + """Sort the sequences of the alignment in place. + + By default, this sorts the sequences alphabetically using their id + attribute if available, or by their sequence contents otherwise. + For example, + + >>> from Bio.Align import PairwiseAligner + >>> aligner = PairwiseAligner() + >>> aligner.gap_score = -1 + >>> alignments = aligner.align("AATAA", "AAGAA") + >>> len(alignments) + 1 + >>> alignment = alignments[0] + >>> print(alignment) + AATAA + ||.|| + AAGAA + + >>> alignment.sort() + >>> print(alignment) + AAGAA + ||.|| + AATAA + + + Alternatively, a key function can be supplied that maps each sequence + to a sort value. For example, you could sort on the GC content of each + sequence. + + >>> from Bio.SeqUtils import GC + >>> alignment.sort(key=GC) + >>> print(alignment) + AATAA + ||.|| + AAGAA + + + You can reverse the sort order by passing `reverse=True`: + + >>> alignment.sort(key=GC, reverse=True) + >>> print(alignment) + AAGAA + ||.|| + AATAA + + + The sequences are now sorted by decreasing GC content value. + """ + path = self.path + sequences = self.target, self.query + if key is None: + try: + values = [sequence.id for sequence in sequences] + except AttributeError: + values = sequences + else: + values = [key(sequence) for sequence in sequences] + indices = sorted(range(len(sequences)), key=values.__getitem__, reverse=reverse) + sequences = [sequences[index] for index in indices] + self.target, self.query = sequences + path = tuple(tuple(row[index] for index in indices) for row in path) + self.path = path + + def map(self, alignment): + r"""Map the alignment to self.target and return the resulting alignment. + + Here, self.query and alignment.target are the same sequence. + + A typical example is where self is the pairwise alignment between a + chromosome and a transcript, the argument is the pairwise alignment + between the transcript and a sequence (e.g., as obtained by RNA-seq), + and we want to find the alignment of the sequence to the chromosome: + + >>> from Bio import Align + >>> aligner = Align.PairwiseAligner() + >>> aligner.mode = 'local' + >>> aligner.open_gap_score = -1 + >>> aligner.extend_gap_score = 0 + >>> chromosome = "AAAAAAAACCCCCCCAAAAAAAAAAAGGGGGGAAAAAAAA" + >>> transcript = "CCCCCCCGGGGGG" + >>> alignments1 = aligner.align(chromosome, transcript) + >>> len(alignments1) + 1 + >>> alignment1 = alignments1[0] + >>> print(alignment1) + AAAAAAAACCCCCCCAAAAAAAAAAAGGGGGGAAAAAAAA + |||||||-----------|||||| + CCCCCCC-----------GGGGGG + + >>> sequence = "CCCCGGGG" + >>> alignments2 = aligner.align(transcript, sequence) + >>> len(alignments2) + 1 + >>> alignment2 = alignments2[0] + >>> print(alignment2) + CCCCCCCGGGGGG + |||||||| + CCCCGGGG + + >>> alignment = alignment1.map(alignment2) + >>> print(alignment) + AAAAAAAACCCCCCCAAAAAAAAAAAGGGGGGAAAAAAAA + ||||-----------|||| + CCCC-----------GGGG + + >>> format(alignment, "psl") + '8\t0\t0\t0\t0\t0\t1\t11\t+\tquery\t8\t0\t8\ttarget\t40\t11\t30\t2\t4,4,\t0,4,\t11,26,\n' + + Mapping the alignment does not depend on the sequence contents. If we + delete the sequence contents, the same alignment is found in PSL format + (though we obviously lose the ability to print the sequence alignment): + + >>> alignment1.target = Seq(None, len(alignment1.target)) + >>> alignment1.query = Seq(None, len(alignment1.query)) + >>> alignment2.target = Seq(None, len(alignment2.target)) + >>> alignment2.query = Seq(None, len(alignment2.query)) + >>> alignment = alignment1.map(alignment2) + >>> format(alignment, "psl") + '8\t0\t0\t0\t0\t0\t1\t11\t+\tquery\t8\t0\t8\ttarget\t40\t11\t30\t2\t4,4,\t0,4,\t11,26,\n' + """ + from numpy import array + + alignment1, alignment2 = self, alignment + if len(alignment1.query) != len(alignment2.target): + raise ValueError( + "length of alignment1 query sequence (%d) != length of alignment2 target sequence (%d)" + % (len(alignment1.query), len(alignment2.target)) + ) + target = alignment1.target + query = alignment2.query + path1 = alignment1.path + path2 = alignment2.path + n1 = len(alignment1.query) + n2 = len(alignment2.query) + if path1[0][1] < path1[-1][1]: # mapped to forward strand + strand1 = "+" + else: # mapped to reverse strand + strand1 = "-" + if path2[0][1] < path2[-1][1]: # mapped to forward strand + strand2 = "+" + else: # mapped to reverse strand + strand2 = "-" + path1 = array(path1) + path2 = array(path2) + if strand1 == "+": + if strand2 == "-": # mapped to reverse strand + path2[:, 1] = n2 - path2[:, 1] + else: # mapped to reverse strand + path1[:, 1] = n1 - path1[:, 1] + path2[:, 0] = n1 - path2[::-1, 0] + if strand2 == "+": + path2[:, 1] = n2 - path2[::-1, 1] + else: # mapped to reverse strand + path2[:, 1] = path2[::-1, 1] + path = [] + tEnd, qEnd = sys.maxsize, sys.maxsize + path1 = iter(path1) + tStart1, qStart1 = sys.maxsize, sys.maxsize + for tEnd1, qEnd1 in path1: + if tStart1 < tEnd1 and qStart1 < qEnd1: + break + tStart1, qStart1 = tEnd1, qEnd1 + tStart2, qStart2 = sys.maxsize, sys.maxsize + for tEnd2, qEnd2 in path2: + while qStart2 < qEnd2 and tStart2 < tEnd2: + while True: + if tStart2 < qStart1: + if tEnd2 < qStart1: + size = tEnd2 - tStart2 + else: + size = qStart1 - tStart2 + break + elif tStart2 < qEnd1: + offset = tStart2 - qStart1 + if tEnd2 > qEnd1: + size = qEnd1 - tStart2 + else: + size = tEnd2 - tStart2 + qStart = qStart2 + tStart = tStart1 + offset + if tStart > tEnd and qStart > qEnd: + # adding a gap both in target and in query; + # add gap to target first: + path.append([tStart, qEnd]) + qEnd = qStart2 + size + tEnd = tStart + size + path.append([tStart, qStart]) + path.append([tEnd, qEnd]) + break + tStart1, qStart1 = sys.maxsize, sys.maxsize + for tEnd1, qEnd1 in path1: + if tStart1 < tEnd1 and qStart1 < qEnd1: + break + tStart1, qStart1 = tEnd1, qEnd1 + else: + size = qEnd2 - qStart2 + break + qStart2 += size + tStart2 += size + tStart2, qStart2 = tEnd2, qEnd2 + if strand1 != strand2: + path = tuple((c1, n2 - c2) for (c1, c2) in path) + alignment = PairwiseAlignment(target, query, path, None) + return alignment + + @property + def substitutions(self): + """Return an Array with the number of substitutions of letters in the alignment. + + As an example, consider a sequence alignment of two RNA sequences: + + >>> from Bio.Align import PairwiseAligner + >>> target = "ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG" # human spliceosomal small nuclear RNA U1 + >>> query = "ATACTTACCTGACAGGGGAGGCACCATGATCACACAGGTGGTCCTCCCAGGGCGAGGCTCTTCCATTGCACTGCGGGAGGGTTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGTATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTATCCCCCG" # sea lamprey spliceosomal small RNA U1 + >>> aligner = PairwiseAligner() + >>> aligner.gap_score = -10 + >>> alignments = aligner.align(target, query) + >>> len(alignments) + 1 + >>> alignment = alignments[0] + >>> print(alignment) + ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG + |||||||||||.||||||||..|||||||||||..|||||||..|||||||||||||||..|||||||||||.|||..|.|.|||||||||||||||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||||.|||||.| + ATACTTACCTGACAGGGGAGGCACCATGATCACACAGGTGGTCCTCCCAGGGCGAGGCTCTTCCATTGCACTGCGGGAGGGTTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGTATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTATCCCCCG + + >>> m = alignment.substitutions + >>> print(m) + A C G T + A 28.0 1.0 2.0 1.0 + C 0.0 39.0 1.0 2.0 + G 2.0 0.0 45.0 0.0 + T 2.0 5.0 1.0 35.0 + + + Note that the matrix is not symmetric: rows correspond to the target + sequence, and columns to the query sequence. For example, the number + of T's in the target sequence that are aligned to a C in the query + sequence is + + >>> m['T', 'C'] + 5.0 + + and the number of C's in the query sequence tat are aligned to a T in + the query sequence is + + >>> m['C', 'T'] + 2.0 + + For some applications (for example, to define a scoring matrix from + the substitution matrix), a symmetric matrix may be preferred, which + can be calculated as follows: + + >>> m += m.transpose() + >>> m /= 2.0 + >>> print(m) + A C G T + A 28.0 0.5 2.0 1.5 + C 0.5 39.0 0.5 3.5 + G 2.0 0.5 45.0 0.5 + T 1.5 3.5 0.5 35.0 + + + The matrix is now symmetric, with counts divided equally on both sides + of the diagonal: + + >>> m['C', 'T'] + 3.5 + >>> m['T', 'C'] + 3.5 + + The total number of substitutions between T's and C's in the alignment + is 3.5 + 3.5 = 7. + """ + target = self.target + try: + target = target.seq + except AttributeError: + pass + query = self.query + try: + query = query.seq + except AttributeError: + pass + sequences = (str(target), str(query)) + letters = set.union(*[set(sequence) for sequence in sequences]) + letters = "".join(sorted(letters)) + m = substitution_matrices.Array(letters, dims=2) + n = len(sequences) + for i1 in range(n): + path1 = [p[i1] for p in self.path] + sequence1 = sequences[i1] + for i2 in range(i1 + 1, n): + path2 = [p[i2] for p in self.path] + sequence2 = sequences[i2] + start1, start2 = sys.maxsize, sys.maxsize + for end1, end2 in zip(path1, path2): + if start1 < end1 and start2 < end2: # aligned + segment1 = sequence1[start1:end1] + segment2 = sequence2[start2:end2] + for c1, c2 in zip(segment1, segment2): + m[c1, c2] += 1.0 + start1, start2 = end1, end2 + return m + + +class PairwiseAlignments: + """Implements an iterator over pairwise alignments returned by the aligner. + + This class also supports indexing, which is fast for increasing indices, + but may be slow for random access of a large number of alignments. + + Note that pairwise aligners can return an astronomical number of alignments, + even for relatively short sequences, if they align poorly to each other. We + therefore recommend to first check the number of alignments, accessible as + len(alignments), which can be calculated quickly even if the number of + alignments is very large. + """ + + def __init__(self, seqA, seqB, score, paths): + """Initialize a new PairwiseAlignments object. + + Arguments: + - seqA - The first sequence, as a plain string, without gaps. + - seqB - The second sequence, as a plain string, without gaps. + - score - The alignment score. + - paths - An iterator over the paths in the traceback matrix; + each path defines one alignment. + + You would normally obtain an PairwiseAlignments object by calling + aligner.align(seqA, seqB), where aligner is a PairwiseAligner object. + """ + self.seqA = seqA + self.seqB = seqB + self.score = score + self.paths = paths + self.index = -1 + + def __len__(self): + return len(self.paths) + + def __getitem__(self, index): + if index == self.index: + return self.alignment + if index < self.index: + self.paths.reset() + self.index = -1 + while self.index < index: + try: + alignment = next(self) + except StopIteration: + raise IndexError("index out of range") from None + return alignment + + def __iter__(self): + self.paths.reset() + self.index = -1 + return self + + def __next__(self): + path = next(self.paths) + self.index += 1 + alignment = PairwiseAlignment(self.seqA, self.seqB, path, self.score) + self.alignment = alignment + return alignment + + +class PairwiseAligner(_aligners.PairwiseAligner): + """Performs pairwise sequence alignment using dynamic programming. + + This provides functions to get global and local alignments between two + sequences. A global alignment finds the best concordance between all + characters in two sequences. A local alignment finds just the + subsequences that align the best. + + To perform a pairwise sequence alignment, first create a PairwiseAligner + object. This object stores the match and mismatch scores, as well as the + gap scores. Typically, match scores are positive, while mismatch scores + and gap scores are negative or zero. By default, the match score is 1, + and the mismatch and gap scores are zero. Based on the values of the gap + scores, a PairwiseAligner object automatically chooses the appropriate + alignment algorithm (the Needleman-Wunsch, Smith-Waterman, Gotoh, or + Waterman-Smith-Beyer global or local alignment algorithm). + + Calling the "score" method on the aligner with two sequences as arguments + will calculate the alignment score between the two sequences. + Calling the "align" method on the aligner with two sequences as arguments + will return a generator yielding the alignments between the two + sequences. + + Some examples: + + >>> from Bio import Align + >>> aligner = Align.PairwiseAligner() + >>> alignments = aligner.align("TACCG", "ACG") + >>> for alignment in sorted(alignments): + ... print("Score = %.1f:" % alignment.score) + ... print(alignment) + ... + Score = 3.0: + TACCG + -|-|| + -A-CG + + Score = 3.0: + TACCG + -||-| + -AC-G + + + Specify the aligner mode as local to generate local alignments: + + >>> aligner.mode = 'local' + >>> alignments = aligner.align("TACCG", "ACG") + >>> for alignment in sorted(alignments): + ... print("Score = %.1f:" % alignment.score) + ... print(alignment) + ... + Score = 3.0: + TACCG + |-|| + A-CG + + Score = 3.0: + TACCG + ||-| + AC-G + + + Do a global alignment. Identical characters are given 2 points, + 1 point is deducted for each non-identical character. + + >>> aligner.mode = 'global' + >>> aligner.match_score = 2 + >>> aligner.mismatch_score = -1 + >>> for alignment in aligner.align("TACCG", "ACG"): + ... print("Score = %.1f:" % alignment.score) + ... print(alignment) + ... + Score = 6.0: + TACCG + -||-| + -AC-G + + Score = 6.0: + TACCG + -|-|| + -A-CG + + + Same as above, except now 0.5 points are deducted when opening a + gap, and 0.1 points are deducted when extending it. + + >>> aligner.open_gap_score = -0.5 + >>> aligner.extend_gap_score = -0.1 + >>> aligner.target_end_gap_score = 0.0 + >>> aligner.query_end_gap_score = 0.0 + >>> for alignment in aligner.align("TACCG", "ACG"): + ... print("Score = %.1f:" % alignment.score) + ... print(alignment) + ... + Score = 5.5: + TACCG + -|-|| + -A-CG + + Score = 5.5: + TACCG + -||-| + -AC-G + + + The alignment function can also use known matrices already included in + Biopython: + + >>> from Bio.Align import substitution_matrices + >>> aligner = Align.PairwiseAligner() + >>> aligner.substitution_matrix = substitution_matrices.load("BLOSUM62") + >>> alignments = aligner.align("KEVLA", "EVL") + >>> alignments = list(alignments) + >>> print("Number of alignments: %d" % len(alignments)) + Number of alignments: 1 + >>> alignment = alignments[0] + >>> print("Score = %.1f" % alignment.score) + Score = 13.0 + >>> print(alignment) + KEVLA + -|||- + -EVL- + + + You can also set the value of attributes directly during construction + of the PairwiseAligner object by providing them as keyword arguemnts: + + >>> aligner = Align.PairwiseAligner(mode='global', match_score=2, mismatch_score=-1) + >>> for alignment in aligner.align("TACCG", "ACG"): + ... print("Score = %.1f:" % alignment.score) + ... print(alignment) + ... + Score = 6.0: + TACCG + -||-| + -AC-G + + Score = 6.0: + TACCG + -|-|| + -A-CG + + + """ + + def __init__(self, **kwargs): + """Initialize a new PairwiseAligner with the keyword arguments as attributes. + + Loops over the keyword arguments and sets them as attributes on the object. + """ + super().__init__() + for name, value in kwargs.items(): + setattr(self, name, value) + + def __setattr__(self, key, value): + if key not in dir(_aligners.PairwiseAligner): + # To prevent confusion, don't allow users to create new attributes. + # On CPython, __slots__ can be used for this, but currently + # __slots__ does not behave the same way on PyPy at least. + raise AttributeError("'PairwiseAligner' object has no attribute '%s'" % key) + _aligners.PairwiseAligner.__setattr__(self, key, value) + + def align(self, seqA, seqB, strand="+"): + """Return the alignments of two sequences using PairwiseAligner.""" + if isinstance(seqA, (Seq, MutableSeq)): + sA = bytes(seqA) + else: + sA = seqA + if strand == "+": + sB = seqB + else: # strand == "-": + sB = reverse_complement(seqB) + if isinstance(sB, (Seq, MutableSeq)): + sB = bytes(sB) + score, paths = _aligners.PairwiseAligner.align(self, sA, sB, strand) + alignments = PairwiseAlignments(seqA, seqB, score, paths) + return alignments + + def score(self, seqA, seqB, strand="+"): + """Return the alignments score of two sequences using PairwiseAligner.""" + if isinstance(seqA, (Seq, MutableSeq)): + seqA = bytes(seqA) + if strand == "-": + seqB = reverse_complement(seqB) + if isinstance(seqB, (Seq, MutableSeq)): + seqB = bytes(seqB) + return _aligners.PairwiseAligner.score(self, seqA, seqB, strand) + + def __getstate__(self): + state = { + "wildcard": self.wildcard, + "target_internal_open_gap_score": self.target_internal_open_gap_score, + "target_internal_extend_gap_score": self.target_internal_extend_gap_score, + "target_left_open_gap_score": self.target_left_open_gap_score, + "target_left_extend_gap_score": self.target_left_extend_gap_score, + "target_right_open_gap_score": self.target_right_open_gap_score, + "target_right_extend_gap_score": self.target_right_extend_gap_score, + "query_internal_open_gap_score": self.query_internal_open_gap_score, + "query_internal_extend_gap_score": self.query_internal_extend_gap_score, + "query_left_open_gap_score": self.query_left_open_gap_score, + "query_left_extend_gap_score": self.query_left_extend_gap_score, + "query_right_open_gap_score": self.query_right_open_gap_score, + "query_right_extend_gap_score": self.query_right_extend_gap_score, + "mode": self.mode, + } + if self.substitution_matrix is None: + state["match_score"] = self.match_score + state["mismatch_score"] = self.mismatch_score + else: + state["substitution_matrix"] = self.substitution_matrix + return state + + def __setstate__(self, state): + self.wildcard = state["wildcard"] + self.target_internal_open_gap_score = state["target_internal_open_gap_score"] + self.target_internal_extend_gap_score = state[ + "target_internal_extend_gap_score" + ] + self.target_left_open_gap_score = state["target_left_open_gap_score"] + self.target_left_extend_gap_score = state["target_left_extend_gap_score"] + self.target_right_open_gap_score = state["target_right_open_gap_score"] + self.target_right_extend_gap_score = state["target_right_extend_gap_score"] + self.query_internal_open_gap_score = state["query_internal_open_gap_score"] + self.query_internal_extend_gap_score = state["query_internal_extend_gap_score"] + self.query_left_open_gap_score = state["query_left_open_gap_score"] + self.query_left_extend_gap_score = state["query_left_extend_gap_score"] + self.query_right_open_gap_score = state["query_right_open_gap_score"] + self.query_right_extend_gap_score = state["query_right_extend_gap_score"] + self.mode = state["mode"] + substitution_matrix = state.get("substitution_matrix") + if substitution_matrix is None: + self.match_score = state["match_score"] + self.mismatch_score = state["mismatch_score"] + else: + self.substitution_matrix = substitution_matrix + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/Align/__pycache__/AlignInfo.cpython-37.pyc b/code/lib/Bio/Align/__pycache__/AlignInfo.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7955c2f6aedeca9879cb2f5613c13d55aaffcdf4 GIT binary patch literal 16101 zcmc&*OKcohcCD)Ze?(C-N0w}PT;s7U(xEz}{EfzuO;fhU9-HG>oROz3xn1n4CQEE~ zlU3Cc*;AbiEGL;{VHg-7n7S!EFfNEWp)f&@q~vkH(+U<8Alb6@?M zrpFGlNDF+b@B7?$-#zEv`|9Om$Lbn>&uY!?UtQO$cI? z+q(R3v<>`U=ofa1?V_$-)$F2O`cSh=ca3%lXJxyBvkK12cHOSpwGZ`n#Wt>K3ytx= zys{s7RyQ!+L2u~oSb;knm_2W}V_JQ8bFkwK0&}==%jpJwt6qO?U=BUo@yuXo2Ht^b z4Q$i8J#=kzYqaTn@fGuq8*Jg>fo~3bW^Xj;vVMQDo*tTaw%qQP={dp38<_rRXU9Ul z!NIOG)gxj{0_e!!dVp!yTYQ-M;1fj*k;F*m8YyXK0W5POJV4$V^*X z(4)dNXRphXkPfL#V3u);Urcc`OnWA{qIP%_-`p zwdH453E3UHgG~s^k!n20AN2#7C2Nq$xuxpgu?|cJGl_{u?LHL7-P{VyE~e5M_#=wE zHnfBxV5v}ce@w}IqXz`_q-y;@t2xc!GE8>xq9iw4}8b- zZ`^WsmVaw$>A4##0}E~0)>Ai}!EOIW&|ALohP!dYcLV3buGQVPHXZ-Q%kJ zSKrpWB_4(4Gn!r4F67RN`LnRHUGYM@6jqX_U(g1pva^zg_e;-d1?>ebtlFgqwMlhv z={huFQb+5J{mWqmZ9h9{;+^IhEo^KTy}t{aoBE`#u1p$xpQiT=@6)h;uM{@#>$2)nEc>m3Mw+;MX42|csNpbHXjG}&5=hcsOuN9QT#$6p{^{)OfG;tf!@LG5pJvgJO zH?nuHYe6N11fZj3>j{YW`=fwH$TE8tlqy>lzOy%S23?1GcGTLXQpx(YhmJouADA{) z`rts;3+ftlVPI+W`YrRd04t#1H*by5!X}LGmNy)2!nDTgmsO?BVKKY*$T2q#U{}>4 z4Rv=of|ieVr7udk*)vgj8eCbRtOysA_vT$$Em#>>VfoE<7OS#SuB@#r(A1)PY0I(g z&1y_qq?7yG(Xw!3GN&q`s4vjmEK21GM3*Oio z_U%P=>kbVCI%L_r2jS%_TuiGQq6+Vh>%*L406QD*=4c2j*4?rQg=j}AfvH11j-4|m z%GwfnXb5*K8;_X%A#4X`%f*W#FM^}B&0GFx!w=kGB&Z5-_1yim6M|V*S7v2l(S%_# z-9Sz7j$_64$TIgvZg*RN)^@$%AXBegw^qE(5s})zl-6!u$b^IAb;v9gmU-LiLp?-5 zKo1XU>Us(>C^!q!^#YeTxrAqF%)kA zNn)nETNFAqQGOSa9(HzsQSNTvLEBI#-@)SW&2?|&aQZP4nYMU1YIp)h80a9Y&NP=( zg=OD3Sn6UP_lPaRD@ZL+69Fy3I!GYk!X0R2NJZrG&LPKbI>4NP=wH<3ezEtI* z4?K>Fd`6H?6;*a)#s*||(l*ZFn*S^c?R2qFFPt=rdeLYWX7scAj6Q3eGMaLwt~d3v z&T__R;{O@_lz}5-{7@d;rmH?0)gC_z)#?eV-TYM!)82_OZS4@IwGxyP+!`Uj}fVcmiOwD_f)yVGzzWB z6*`-)(66;V&*azVRK0860xQLj=Ba@{p8NMugkbmEHR!Hp8+Y{&^gq;k@LlcVht;2g zW8T##h5h|N$C}k{74GU}qc?k#;{N-A9>S}AP)qL^xbxjfDb((f#TLTi8SNI%a8yc< zc&&V|LJ#=9)F}6|D}|qG_sigkZ(P^*E^^I*(^aj%2B*8~3>=Ssm<5g6?OR<(Tw&Ts zc(CH50wVgOO#lRx_DHCoR`0}T1z#8f(1Be!M+++pkU^x}0KB2X&7^%QmLhAZ-T|!g z#6r9_+wJunk8Vs%dHl?u5xNrtSz3wU4M1Nx_qncKCxJq%WMkr#TV`(5%AqIN=3L54 zmy|P_Aoh>v&##44e$6Sigc)8g_$Qzc^*BZE%T~-J7y?pwP__O z_eeUBdC7-|e|3FjebqFVTF<0!Q!*>-7cR)<=cg{Ou3zQT%dMqs_^*Oyv!gxN6LKv6 zD2B0R-F6PQ7R&moh!$;{Pom}L&EH1(^i)If9ONHFqw?T%N2g|wbv?=6$%D^leY{RP z3~|At2JS%kB(#D~G+hGdCBKL{M7AjL4Ve72hkQDxQknMVRh>$lbI}(~crSrB%VjiPiK=r#c(P(cIbJc3~#r5BT7kYhZW`seFf&6Jy1ihuP2hYHERzX5!gF z=Kf&P6(zOA*D!xb-u;T_4RgHug4suuso*`W=XcUqe*+7&nrXc1yfLKZ0+ z30%5^ZhvIwDCycbuws9}%nf4;=)B=zzHDa~#Ey}f??9Y>;8^gUuy8E6Ll~ubKJL$X z4$+-nTcUD z)?@UHpB46+dqr4WU11}UYM~Grt)+HF^)Diel5?_Yh%LU#6+VQieei&$Ul(BkPf zp}$lgbY_YODjmR!>L7X7a}b&ZM#Pr|cv1DTbe(NXRgyzZ)pYIxji!Zwo(jQYk<;VG zNp7gAIfE4uX^6iLQzs?`!ELO|srGVqeYFcwnpVh~YnpW0CA`2j zC<^*K4g&42HqrMV0tX~(oShh>$}kBI7tJkUH%-jTE-OP^QlwDF41Z+O-+JqD73#{?J2BQSsK$9W0cT zU>kTA&bZ)!u=sY9);_rebJFi8CrWxk>%}6>c2xKvec+d9#;41n8D%)tvw9UaV0?B; zs)t@l={keNOhbuKU&m4fz19#OE`$YtHUM1@kssNvdS?-yH9*CUpm5jlTR~BgdAk6S z8W3)gBHuq9(pJoZq!;d$!;)RRZ%hh%FN0|7-tADIlo3e9gkZv|_iFdGZNvXvwlS!p z)jC=&qm|13-vkXjSB{;5pHHem^WL$rV3$x^ZU2w?Hd^_~q<%0H)^E*()ic_paa|jn zE^5K??Yj35y2gLDK56_&8yN2(gzSA1HbMh|=}+*+?7b7pL6|fT=E7Ro5bM=cmJ5!- zC#dJ-b{!7)6RM`*0lOMDPQh8R^$(06;fZ(fhf!59as-iK`)WNylgQEhuNsSWu zVUA2NfR^zH*t)#s8N=05prA1JP})wRD?9F zLjeoG5h4lXga9CVWAF>fKXXD?_G&ubyJE8I=KQZ$tc7G083kS&f9vpc)mtVi;6`5gK_FEP0GajF?14Vog+KqI%mo@ReyrdfImORXtDg(cj>s)$ch6 z>e{@#Ng>5?({?q{r0Dn|33!Z(L{*7ufcHj4-fvg?PA{P5wQE#Lq~v#$#-w=YWT)r;xdOKPVukN*FnGuUDi&{QxF)QejL7 z=D!L6DG7ko!YWK_O#r02=Usd)q=C08Fze_Q=qL>_Vz>7z$Qx?+ zs$r9K6z9dsvG7<}4;vt*e*)uNL(~;egAvR~?zw?1Xp^}?w0jmL_xNNMH8ub^_3(J8 zKL}zghW(c?_cP!QB})SlSxpn|v{!jImPBGIxg2W(4Ehj!FCPQSE$^dks<6aGy1Fr! zO=%;l!ByJ3U9bv-OUP^UeVVdUP}*dtN8CM$rNH-#-_BNl%6L&Bhb94ud}RqBV9@eF zic=un38_V_YwUPKl^k;zVsTY;;5@Lae?DKG3V5UzEUuKM9@h2Z(o;z@%!OlTKpE?nr*!dsw)cHMXv@34CPk%(EzRjz4QWMCa8tz zRyw4-DdEMGY~cA#plh!7w^TR>zOuZIJte!?RsgcIV+}@FHdHN1jpjv*+lZRV5MuL? zN-k*{QcdP*ijJ%aa*Ut3#wBmgpYs>QC8niPCt!e7w0J#BX43>$!1SN4k zCsa(PB$g^Fpu15szks7^qQu@ye80KtJ0p9D77%xgj$;@$@}|j35X#`aj0f9I$~EbT zM`x;C6T3ib&~C=P6{wHu0RlyRMJ04M(zg%ug9mbxl+^hYkl23#^=jX)iicM;&I0WX z{cNEKo}k#lEck&Tf4qaJ5aK~iBvzY+G9p5!_3@*5R&YcsT{@WjF+s(Ekhq0Cv>jmA z5L_@FE?5$lP;`$0bL!7aSIAbk1W5~mXW$*9z=R>-N zrow#eO8OMoUqXRtE~SN+d|$_M)1XK*QR z?QhBb_i?`jhj1;de60I(VOfq@J`Uc2_a?^&U>lR;sunPZ(wz371};$xxx?v1I2)e8 zP9wAd9`dXBsakdg{jY*u)U5Zwey`9M*ing3KyZu}$a^9WB%Tg@$FC8X$O!+@^dd)N zW1AhDqp4Uvag?z;L=eP0CEYm;USQ@^mX{F>wV^;F-C2s@nzQ$|Y&7^{mSC!aWP}P% zlC!usAU()_A((B`#w2~Cl}!3~kQy^sZrH!6y7PYO8*w;3t~rq~`hS3=#V)vsA`uD@ zrmD%C2Z-?dU*(v-CFTTp!Hrq-+Y9+F!ZdO-4n~vixCJPHJjq70ybdghImbRE>G)KK z^2Q7uLVn@bI?G?k^i6e$#3+e_B~rz#OY1qFGSz`YCh|z3NL*8iStS~h%5f(2Am0nw zLo_ukU=pE4Z4Jo#$PykA|2f4hLBpha_8qD%U5`~fgVb2R(xzkNO6e^VxKyuANf{

3&4O3NJHyBq^2)tP@ZR-?w_UU%PHh$o zciWr#*f9;0CVp)*h3ZIZI74+Jp0_G_yt3kBu7Pou} z1!>R8v17*pTE!09$C9g3UPX;RA%-)vqCJX@Q258pO}$u0Im1m*+z=(WJqsVc zNRJ;re-;0q!ZRlk+pXg&qq{SOIpa{&obg@NRYjA!x>^Jf7jc-a2rYSCM&T@UKwx(>} zly&<5FuDn8jfeAp^t@7cCgTqh3>;s2HANxi?j{BSTex6aX_PrHNOK|N7)w#8y7Z#XoZCF`B3yGz1pBQWSssVpxnTWMD?-e|wyg59`$-&Lz{Q6*k zkaVXL)jJ*aT@8+#oz5P-$@osC)3Jx$PRFC8;8D2Vw^>|Z!91UG=4kM|br!UNG8{gQa|p5+$YdmM zCnixU7`P1O5J%%4z2OAsk*2`+Jlw*uWue;KE;M9l&@t?x z7}nAKG;o)&=INZ)ys$UvvtuvFMmS&E%r|G^Q-c+A-CRu@xr!az%jS}~oL*VS*^>E` zxs+a6iJ!@?sAn$bo>`T*@Sf`Mk01fErC9tHBVU`?d4qkQ>a#Z$q?Dw}i1$Rs>CUix zpCL@I!;HYYNl=i<0DL`k^WDU&5>6qj=%#A^RRBto2f@}=ZuwKQcpjm=_$@&l&a4j) z2|AE;O@WUL9-oGRuchWgZOxG{6q0BsH`L0bSudAwzKd3F%GD{;#kI1^wL*gdh{h*b z@xG1gFb0Re$Kr@hq_!<~KMh*t;!vU!d?@~2BMYa&UEvp4={~JgtlaQ4>SE)eliO6t zXWw1A83S4jA~9!AM77~?iqc*Hvge7}sfbfmk*^ym)MUq?h*0ewR*~JFb4R*|Cb9D{ z`P_r+0J{{^dZbg!?9^10Hyu*k&HVl~W?kbCadL40~ zQMSG|J|=m}fBx#l|18XHD%v)}@gYW3X@!H5fHI1Uu|U+v#+c(}wl& zY}{~qb81Hh_i=~<{Q(ZZE%ht|WEajb&tjz80uT+di(%muzp&Ex&VuXVi!2`Rf0tPt zu+hudPM{;wgY5-pH1*uRDG{a9*pi^z_zsJ;hWLwXX=k)3( zL;%kDkzOAc;D+GzTx+=pQC!4LB^I?UBPzhKwwo$)A^AJ(<$}6JMM~wcynugD-A?Ce z%8EO-HS&l8xka_WZ5VYV>%b?PvHaxWuRm|&r(@1Q%}sPZROt8geTNSWzT(Pxa~$6e7?E_>9I6Y*Z}~XhnPw(u=qIzmB7*bO*T* zXQA9~B4EwWpCuODE+UB@wCiX+j+?ccJWiUIX>1#AHz#J9%9DbxQ=#zhQQ&GvP#vGz zRN-09>0Td=(;krJVOC`t@!`4VxhUEdBn?`K&B=!dLaJnrC8se7ZfqP%!YhY T-(rDTYkctYdm8_ZX0H4XbA)R^ literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Align/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Align/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54c2237ca90ed245893781fe4fd7bfb9d28a4b25 GIT binary patch literal 70400 zcmeIb3v^s}dLK4#3lDrJl{+rQT%Xa>DI$^KU2I{O#0IAAWVa1^g=DS3iFBxV`w*i(doy)#vu( zSAQwJK6pF5G*~*|4%|q2KaP3_H-?@u^8e#uqYO3uY{bxIOkd0v-* zI^V}=CKkq%&F*DsOUd)`;~{_NnqMo|c53BnWqGq$^U5nFe>~GnqmpLf^E*CKZi>xfk{p*rQGCF+8uY(ZszUeQpU}?xwjKbSv7e}xl>SITno zzMfvnyN|iOZXeQhyZ!C}(iPl6_W-_o+#&ZMzI)wa_aS`uxrf}t`0jT{+#~oNaF4pj z@IB}rcOS<00rzqDg!>7UG~_z&C-LN|v%abx3_<6p1dL_**3syF>{=jQeD%5^EL=y3&Z$SU%S znXh^g{?S#Zt?Ow zuXwbGZ$6!uuc@giBy!fu_46}NoSAv@Tz&0YeeT)mPfwpe_vxu=b^DP^JsT?Z$p=>I z6G5rxr{hXRKkt78&WB1p9apN1>*f0!*H51gm3sC;mHOnTL#3X1P^Er)I#lX8TdFLA zD;3AD0ssIqZ2^|}&MoXU*}&Xi+{Ql6rfci+4tsc`>?092um$Mm?Szz9Dz02dVH(oO ze&o6~$D1sFuK6kF3if}o#uQZ?B++fVR(`AGOL=HOTU7#yR7KUBp&bk$7T#Y^NCC37 zaSVHM`tV4UHq6RPY!<>7NA1nglfHAZJnEcu##HL5vO7W0sDF-+tBs3XlKbv^jJr6R zEOyhhCyU($?a5-7bz8Fdb=sCJe&Mwxi(g5SOu9-y&BasP@T*R71*ijY7q|RY*~N*Z z0i549p}zB0xyZFv_L#i18=BM**nI8u8<-I_2X`NPX$)sRoO^P*J$rnI)S)m3NL<>j z@%S^)n5P(V7vN+anm9Y*OuwPwJE7IwoGR>jZ^Ee+ZC>hfgZim+E_{rfHbu*L1gFUToen|j*ah6N?AUc2PL06{%8p&m;naZA zHg)`}?vy%yad%36wF{4%YM%GxccsLdM{Lg z0#5?-21Te2X`a0s#T6jjTujvs7f0)yg%CQ8!Lo4}DvDCpktGwSE|)4L5NOV|9nhk_ zQ>g-x-XYR`t+;aICh&P5fMs(Fr#FZ}wdTt;jjP8r$*q=rXW{uprwE*Xa|;W#%6dzR z{1*dSB~N=K79cC)pTPkcet+*jm|15!td+S~zR0)of` z+JUiABuY@}gdpBGua`g>VdQW{#|<4tQVWDn#ow`Hk5^hnB2w~(?o#7GEUJv74n#q1 z2lNZEi-i@Ewfy6E8lQQ5{5Nr_gSfrX?G4tzH@KbrQgSU(Pb??vN$-(*;!dibd@q?y z)YC|l_h!n8M!GUoOV`u2%fJbAk>!V}Mg-!RA2g$K!^W zjH<6D!W3k`%>aTxj3-Nhqsa>2EMf%;$tTyKO|~f9aDjms1e&S_U*1h(qPMOWua#F5lv{DmE4OCPoCC$F6#3%mp(9Ns8Vrko>luNqvSd7b9i9fK{yU1}t1t z8Sz%bT5;lwv*q$eMitG#W_jUO?Rv1guwTan)1NTt4i2CloI}1sZ6c+}51C)XMnGRz zKsZP@D?6Ll&{k}4L(^l~Xfi(NWu!pxt~9yNdFBCGt+*H#wq<9fwu4m?>_>%;g%)kV zEU0kQ3AV@@KtW^_2$f${e1RRGz`}yPE6+~|!~+Fn8%AW-t)e4<20AOpC3cYmtX1{7 z%kG4E3R9q#3||4aw)oaGibWPmsUF0q7qD*9#&Mk@*@|FYplVg%81BYKiJi`0UK|~t zaLVpeqjTIMnqgr&a4k01WGL6gQmr-9N^B<3*S4z5#m&vuT(T38Gp;9NuN5&j6VA#T zippg34DH7I*uP+3_)mS$C?;oPbk-Slfp?8I?xPsBY{T0Rr4hXQEN*^&{#ATN8^OkC zp*w;XGGPJ3=S;xxdBSWCJ7s!qaCT!-!cRH&4hKE_kYnq}pxUQwg$(IbXdLX@?>WWs zI5q2>;&_~D*cU;z_I}$$-*Za(&+NMYp%NDMCKd&c)d07s4-th)U@unI01y0~E%LZa z?%v5z`)~rN6URVa0RV(#9LdcLb2Pgb zZ*A%1J)t_9L+w2q&o(oDX=An7Ws1$41H(IjW7-?S%OE0Z z%ga++x6l*)qlU8mqqrna<&*hTE}2Od()naASxDlFyG&~D;keB)WlC5{F*p9w`ZJGj z|3zGCiF&e@1kx~61HwV10cb?xcG9~5G$Zp~qLHd)fr_NQo)lCfv7WorrI3%xkV}*# z2b5%?mRAUen;>Gbu=gFD=dF@AsRnd1G{}+p3!O^ip<`(Z@`jvZJnx;GFIG;~0P1U< z3M6I4qchCj9q;z~(K~N|mqA>bX_VI-UMtmKhbhis zDxd64_NO_wspQ`2_}T1S;7XE(;g1u|NtOwo!A(7}4%ry6#EBZF5EFZx_#-AZSpiE}@tfrZ+@@%A>+Fi=&x?4GJ29uTF`~DY8z1MS8Bw{a1o~gfAIO*d&mivYG(-_hB|KC6T(?5M5q(=gVmX~F@HB(^f>0Oso@K=^(>w@c+)X2{tw_%OT2@%a2uP(1tIp1Y$MM6TC#za;BrlTrk=W! z{%T?s0%*u2^GtsU>xINx<)VF;HSe%Cy0|t>8l=d$c_Hm^hZ?dh!R86bk1h83a`& zpewqR=_=fJ0|#)Z6d}p%v9H95cKLxUn}bAex4gNt$@;-0qMXqgGaC^iNAjl5MVUuz z5(zyiP0Rn-bT#w9L;(FOLf^)`r(oC0LA4{hIsD53mK+Z+;@7W9r+CnNh<~%>T(Mj9O{zBh3(K^Epbsp zCK1^qr$nX=Vzmvx?VW6C@5yq2tm9rGJpTK`fEN~$@KC0*${Bq85nSqtHwGJoAUGl3 zOTG-{knNP1G;hA5VyP1gOOa-51FA+THS=hVn|nSH+*7kWkQhS1yExbR32YYFp1MdU&|7kN>81GW} z1Tg`HR6q?P(aaUMwg7@;1e%%kYPn*-e}(|0nI(YpWh6|kf@^i@Uj6|J@mX2oaDkv7 zSk>{=C~&Mp$-U!u8y-6rM0^PJ=kf6!Tmb5UK@oRa2Okc&Q~JF`4P3O_$<+)G^YK*i zcag~g;`Sq#D@(IimDbO-owYSUrVuPz6et%ttHEC&cNJugoVy}EC;{^v8z*?ivQSzM zLE!=6D%u=jF6CC$_XCEk4<#Z%#Vy6x(Gpxh)R0gi#H_erxgb+<0|=>{BCVXU8O#3z z$XBSfGkrGL=-5rgGb__io?Ur5d<2!8>!n?DXaVzJk2XPi;^=~q1n7$hl|%gmI>$U4 zTVucu0@On=mo2Xfi6R(GMMrlN{5cNTTV<{J;(|$o!QUzS*Ml>WRalU~szmk02Uj#G ze{b>vu$Z8`-1u@FNc(huPnL1A)tr;NPmgE4$FO`_m-z&qnWfu;HUszJ;#poE}2w)j5>*aip4q7zy+DWXq|j!d7hx^Ymuikqf*oGr^; zJ&aI;5QKsLUnzpgr+9Y(FFuOF+(gP!Z3R59&<0Z2kl@$RZUb3@Alo*jjoEk?WaDE*3?`sd z2y~~sMSk&9qx<05g-%U#+zX;`_mbimd7)F*l%x=cyB8%Z>7LxZ0E`5PgqnV(*nyc< zv#m8Th)X`*!XgXd4tCz2jxx+bqY)&Dvd`v1{j_N=TzqbB;X9wZvhd8)_F2b<>LnOz zsdL7ep0OFF+3Wz&XxnCI?y^n5Hmmncpb@=r0%@^IVM>@=3@3CcYzqIdKUmvmeSa5-cz`1W^a06WHs|3sTETsjeP*fCw!6~H9yOQ29ic# zZ&dz)YI7hl)O>nH6{xe+YVmOK#=Qpx84Lvy*gdh2y?*blzK@*_#5_ZhrbpDZ^GVJm z%ga}}lDQHS4|P1jRmy#?-7f~HCXb^)gk~vga|X*;x6u3GO^ec+eJ8jiHm{u8afOYQ zr%F@Nw|2z2S#VgAkhcX+k!5FpY0juZSxq$>Q&MaRTbMBaAwDzxaAImU#8n)>s#Np^ z&x7(zNyLifRi#q{HTfHDJP=|B*GiP}ZIq#T6&&LoMR>tN1Lsp%p?8^okR#~j^76W! z{z6CcLlWBA2a*-AZ%JX1MZ5bbNsvp4H;#iF58k5?BFP&Wuh;x)B)5BC0j?+H$p3@| zr^ykn5yOXxjC*Lt0?0N8y>AWg+E$T4)W)t?w@RY&G45P1LQRQP5L*srQWVCi^i9dA zVIJNrqwqD7*;j#<9 z?}ycWF5nzLz>cy7{4;tYvRG{I@sw-A(TgEi3KvT&MZ)GNq|kgztO<;PR^_9zn$S&5 z%CP}xu;p9m0|PE&Fk*_;!>we7S|J(;AN3=G;kPA?X8|4~cstV16vnXu;a{n{UP*0K+k{D3X+K4Qstu^0J>C3mnbVX6G%;*CLyp9tXh zE2UF7xQUZf16I^woucDH3E-CDz>r}r@-^aea{P$}Nz2h@tX&_ZW~ypNo0{pu=wmR2 z(@Lh2CXAf$pw{gbc+!f-P6VB|@L@`27?Z*R3}e50|Hd+hUMNrEm;dH=ryv<7;8>BrsTUP@yp( zl0y)}6N3r)LjWo%`lY1)MZZ*GmQ)i}1IaPc90^axFh)^QMw=R&iWQN9I&XG3lTUTx z73$Fxelm*h!pCBYzl0xlH2Z5QqsObp5u^AhMiKmnFF_XUrr%CLs~7yj+bL?EHBvMn z+C2|U6WYD+k<&Q#da99WWGNF~&)vv*pW}Bu1C7Qk^dLW1Cx;?)A+i0rSHPoap-*aU&s3gb4E zX)>6CpT)CpeNM}CEKg!?-iU`qX45L8j?Xk{GsOwD;`0Q8FeQhM;seE*_SOYIcYDvn?Um zHMGdhs&@lR7Hq=sNfpC22_08jUu=wLj4c!+0_#W4*VDLK` zwP0x46SNOsNUE}kQ;I^jh)0e%d4dx8s#Q)dDlM^Gr8W*}3A#4ryhs~p+dfUWS=J!z zCRfCISXm`dN5F+fOL;>y>EM})iv$RrHC_cXNhyuEN~~y0)<+=mxuRnXtVMxLG|{jb zWn9~37GNmCeHE3v3|>b_=m%mdI?^Ia0lROd)20}mL=9A5IccIeY~tt#sQRv=zmOvZwb}3A|$()4SzW3>j+-60%zx@cO1@sV)38sqb0|Yo=V?@gi>B zf{_jiBgGF^s!s(mh(eWK8yfKz5G*^MaLgiF_7LxY`?^(Jvvwm8`E2o#S0=J8%08_; zakIMiXI5zIyjtEY!?*yt`^8|iXwRdLn28aPu;%LXxzr`k^@TUD%eqJ6u!_zEW* z^DDSy1b%a!XaQL)OH^qqs3oAynxXgt-uQT%Xx|5>E+#5a0v`gZEXE`C)Zs)#4P5Cs zr$x)Unc>cFW@Uf3vYuy9+?%8!NET!g9ksyGEtnOyYk`B%vv!QeYL1f8=D~$(?TT^W zS8~O=GN5vr>AmunP}oKK<{-B9Q0gzIF(5bCr7|b=QlOEVIS8?ms^UltaOs6B#=mQ`cx`21d=xl zas`6fBUEz*8B|N(PI=d<)vcZ?G3U64U07wnQADsoouJp(2l{E} zQJq8TLm@tXKk9_kUKC^9$S_yoS+jamdaC>{1s#M<0?u`=Pv|;9l|s3tX*s4o4(_@w zbM7N6Igzfo2NB#(H6zo<;`ia>SA;N&kr=crl+Ls_&@>RIP-R!Le!GsXV><;Zi;4@> zVsmNto7QVl{zZU0)8rCP4{T`woG^xg(nTk&#iDYo12_~Qc}8cC>#vn=5UHzniGa@O z-SCF3Frcl($Du#!z{g=YlXyCeJn*4{8&DZS2O4FANU`)h)><<>?M2ckN;Fh;D3kKB z7!)!UtypU?uxe^F1hB}?NXXS7x34mI0-;WqY|x$6et5{<3)El2KK5)&AyET)BJ_ZO z2{{(@QP4IGEgK$Q`vQenI|DPb=i&w;k~loZfR3@Ofp{Xf-RB1ei5uJ5r!vT*$nPs~ zRZCkc?U6%38_(C$7Tm&6FIcT6;T_Ea0hr~hH?;XoFnEeY#0-dGF`Q|@Yl1@;(QwFtgt? zW#b%#PqvmZz`K~`Gdml`ox#X%l~Wdk zm&PAClFF7$Ge~5_h@j}x-f-BJt)DGZLsefRBGf}7Rtid&MuoA6nZg?5Y{b8=L~6H z8kmC1(BBYu6-VMY45_dkce5LF3a3LTSkdS-t`=+VZ;~`;+2AmoiB)4;UdHs16A;T~ zcrfkrUS)g4Y}U5yPlA>$j2A>I+e`xhh#0n+m9yR>LqoA1#B}2Y{-jqR?+IMSbDC)( z9dD-L1<||8my5V8bpz&vh6ENr(g@by>woVar1u zkWUJ!6X{g)XzD253@7(uJ(|Iw&5q==5z#r-&FAs)$@7835&C{S0sa78goscV!t>16 zsT??{o*nyj+mC^>neTh#aGKl9`{=x#POU(pJt+wC6iD%a zRT$4}de16$A5U?j8!Z@6=;)p{UZ!U{qNpJch)=d1-K1nVn9hZr3ayYpM!SL8HQoYw zS?v6Fp2OkjaR|2DJk%xug#j*V6j{8EiVObvlA z&)d?NE<9Yp5VxSLp>=;YZr#t@bsHX`n9Nf%K* z2yVoKl=3puOnR-Nw)_P(=TCnwjcmx3*n2$gSd;x8k{0S*Fn&*C>ChRfOaO$3R2zkh zR1CFIcYG61ydp1xsR!u#H9SkQUeO!vx@-<_zWUWF%{I~nLbLNxat9be|w;mMRS!@izs zq;~JrAZoi!JIlXf#AEB-)PP(s-05LNvG-EYb9~FDT1Tp`MjjfBT@VcZ@C%809_hPr z4bm5OFV=dY{n$-C%QwyCT&`XiPCNlB z4g$r$|4L%}ZG65!(Hy)3_PvKUun|qU-4Dw*X1q|G!+;tGR2BGUCn0H7CIj^oP}Vkr zn|4|xp|(heM-G(`go{8_Fu-w0LPLNexXsSmluArSyTMBfb5`rNb-#TJ;kN+oaVmt^ zl}1&ODOxlY@hC>WZ*FpOE|h7eJ<}OL-m8Y{YR8LF?3wmVXWKI^E-qS>7MaerWD3Z) zPM9k)L7Na)sNJzvxt?voenZC=X~6&(wwzocW=rhJwa6+0DISMOsvgD^eo7rQ<{T@`y=eSO_)j|+J<(8xl%r%z`3*HM?b~d1p1nQq%3%6H~TGWZ%gs^`VXtQElTijj1*K7zm#xijj zvIT1f%fsg8Oa5cav!8#FeMnoA2Dow`J=wT+#cJ2l~)x_oNf znU%%&7^}dVKRtEU7RdU?z=du}$d$;1(T3_0=XHrsI*-Z1WWtaQh&^w?$O4;BV-`HU z8G3|FM5@Zr|LtE&ZNn#=obE&j;7QpN2Mn5*7-KT>LE!>Dt3XF<>*+5N17IDw#NPS% z(?5Jpg^>_b#0SYq?-i8by^IUIRKctJCj7mtwYG+5`1`}}&^L+CjQ4vhc+$-7RA}D| zPr?ka)XaZp>6X%JA{VfkBMt*)B{UC7=dHoOr3QL2%{VySz*YrQA7F#|xx1JRH(O+H-jMBy;4c zREWT#w-kGpu1~#LUcQ}|OT0YG%Vl1~aGU6$cb*q9R(gqdZ}4)BmmOXN=!|n(I z%J#cQ+@r`j;2v|2<9pD3*gb*o1MWHZlkO-|47uOtK7uC)B?K*l(MkYXgrYr!5V?=N z4diy|Fzz3BC**#_J?~DrpF++f?rC=#IgjG)jC)4j9&;ywT0YS{@ItZd-7Nbhd))8+ z@ry=Ti;}c0{hkN4K=aVqtN`E@kkKB_pp9F?r3iA^UBN>HUKq)pfP93&9TB8x@FTv0 z1nBW01MZbby=ic7q|hn=4UtYZM$SGXKm;|5y|V=m12{HRLCP5l$d?*b)Qm=#RH*Ub z)_N2>CK9TN@N1q6fUfarm%mK>oB^5`V2O53iDd+5!Vsp4{}LoiTbQM`Vbuz5;o~zl z#u5xQ$k~C#(ISRKm_K&tp(`g-Lhh8-GE=w%!y_POuZ}v!!vpy-T@Z(wl3GOZ_g9pR zo-Haoc?#8Nn>dLxCfu3efM=`hpIsXPFAPogF`p%*QJuGG)br5LQK+oK0^3MG)oWDY zz=G{G^5hiI7n@)h_MlY`0^2iT;fQ=LXx;*1VdvEBG_x|ckUp`!%?t-bdqwoV2Mfll z^YRyXY4Fklst6)~0wBgG)tl(c0aC*Q?9sM{LsF!og;ZNJle(SBk@Bcr%SG`QMk9sXD zWFP(AK){t>yjQ(Uw2ykZ<@ej)4Rqy~?^*Bu`U`|?-}SDcE5CBDdYNb+{mtG_z39p} z?pbfRzx2SuO@K@Z|Fc+*R)hOlvU_0Mic{@9N<*h_CV2J3ybK{B-7PSg(6 z2kL{$y!UnzETg`9q26~R0bTG~9rq&%XrlLsCV3aq=cDNx2jB#HsD6OVG%&UX8w1b| zKl(~y*8`iX??w`(`~php36+w+XDKXkP(oXvJl%HH;_mnsqt5SznQ%`%gI;njgW2V; z+zpQQQQTpU5zD5~Hq!^)9tmi4(CyXVePRGzz?$tZ{xE19%`{_xr`N#Od?R#(%*#M%I@K4owk zSKOcoQ*0UKniJDJn_X{zR^~&Svb6z>`q-&rs9!;LWk_zpsz}4zvJ@#B_9REni{GDbFbQ*mia=H_Bmg%wLuN@u4Cz_`s_5C>giKs2(ZXk$4J@rmv z1VWs8G+2rtuNW?}I=rv&@>N_IJ+8XS2ww`h75bk#Q^$hlevFlS@30|))7zRKxl7Fy z!`x-epoA59(UjL&zn_Y>c*`C`Qp6nNFJAOY7sJYp!$%3XMQta z$S6Lnfh@fY*n9{Zi4Y=p-JllNCyXG@J&%w7!?-A%yOvr_0b@P{yc-d4fJYODMzm?f zqXx|*zq0ss9#)iK5noN*D$r$2BdLY4UBIIktuVF#cef^8FYt5ZXry$?ZlwGRHs$cE zv`{PvhW|q*kAzA}Ei{uW<2_2_Gkfh84$fvbzeJI?nRBu5A*q~04?OIMgm7@1{c|;l zRG zauYs+0uwU)oMcWPCk(t;#3fP_o}<&B!#I826iW&PstN=Vq zfi4>ZhMo*PqwpKSMb}fIXBj+u0ngIr8JrbCP=5&29UK(q0cWZ-!zz*@Rm(|Yu#Q(3yqps!pq5)#l>YB2er#Bv7) zoav4*@Gern7l*|&jHs(ZNnI+|iq04+8wV58j#cMoy>Rup2i~j~iqv1II9Cx#x8yum zgrP?bQ7x}N7ikliqMZgycaj(hg*>8Tj7Tnvox5>$?(+}48&?hIe9oY8pZYwMm&Z^S zIwIgGU^9aExmomYsFqJU=&Vj6Biyo&bzCRRvk1CD8xRireTONOzuur2sp zwXI`Wq8M57bwT&JAs1+T!7W*V3o!JEdte{?NTN|;u1yxOhvm`aN)ei8E5@YT@4QwJ z$hzc3OOQvcB}66{ffC%6qUVyy-8Q)|%Gd|6Px~gLCa%iWX#{O^qpN`EnGyrodB4X@ zROYFL@Ze^2!g+JE?3;xm_+tCY3Ck@kT#}nh%5*^M+JZ-OYT;BcoN{O_rMLWzX8Kym zT}s12$KHXLw9>HJS%^`)fIzhU=A?rkf?DksOgq%;WoEWNP)P6tg$Y8c_1>CkoFC2H zjhhhAs1SuA8h~fBDZ!Jb2(R2++4)ocs0`ahGUAa!Fx0qi^)b7A4Gfu^R%npBjf*Uy_?@)VCL(2 zYQZ%M^^DsER%Cu8F-!yjT62tldze~nV9?6DZh41R*SqbzEc^r_ANAtU%6y}z-reZk z{S(xgb|um>7@)sr>aO>IHJJ1k>)rL<5wboJQ@7qHc~J+L!M&rt)LZ8YhldA|^(YxDxkgPY8Q+wSfeZ-UIf0%mxD4q*#*F0!8t2)Rx z5Vz4)GB%+vMbP$rObrw=;r#$F)ZZ{fqQ?ge-+>NHH+uy)LRfcz#OSY?W*6u`VjLp7 zYm)4PLI?ZR7hXwG8p>8t(xo)USpmDh(Icfn0D#g89ZQpP zw|^WvstbEP-{^lgx%&&~iG?cfCg1pJ(Ip_^b-dAr>@|Nn)_BVHbm#&CRi9xFc_Iv~V>Xo+3 zc$ryH^*}@;a{-|epe0(D^5Al)aR7pr9N-m%OAscJkQ=NIj3nF?MtKNuatP32CxxxBX4T!%5xgs`J!K z%UuR{8M({ij;Bi+@IRxWVtzrfS$_)aTk%T?X}tfKm!HNZ3~lnQOeA3|2y{8m^aLl$ znDn4xJXx6H{U?0&v%LJLya+J*TfF<*ya+(}d%XL3UIa+|UEcj1F3l8_$5NX!&Eyui zUL|Ogy*ApMdbaEbEF^!>`@U6Ik+c0$Rv;ADOJIG@lEKEYyF*i zL|_QvNC8X>!Vp3gy5W*8!=nU@Qb&Gf4Ebn4D z=PILaS0krP1HJ~sfUhQ3lcKSeuV*!yLm=1H=mNOQL9N-q+~a{?+<`{QBd1Z=--;KnDl_{7fwmPhs76x;4Pdc#=|D39+UxA#t=)&o<7{OuH^<-JG^ z4K)4ifPHr0okZgxq`kv#U*n;Ay4L#+N@pj!}#?OehuL}Uq7(c z+c@O*-zn4&p@sp}aJYULe4D|A(9VhCA0H$im$Tij2Cf3ECXEo+CZyT`$2M|nLPbLvfDd{vY?MDI!40!p1=d|hj|g; zBT!4wAu1n%BkF^-LwD0k8CKxg6MO@{`j+rd$43-Uvqdgtr5A{w1Amta83OJDWms1f znwo`ch&F%YD)2DB*`>Pfd%wssbKBDOW~TNG5T?vFueq8Km3SxLOL-|AZ$g@ri_ zFEb$V2rq{LQ<>&<%WH>;6w1h=3~Ij+=JNgb>N4=umXSpn*-#mG!es#E$O7u*@eR$5 zuO$%ru1lVDBga{knn$U>qjK<#3n@aQ8)dUba{vEG*9bVPA$VFKb`H=k?REin<4$y0 zM%+9}%0>?$cz2^v!#z*QT+I-W+QjuOF))!TIuV z{doN-zE9L2cHx>I2-ZUVDDH@AJ&Sk4f~GxNA9Qm-%|K?>PvHAN{W!`_ZvWf*37zZh z`VpP0j*=geT=n{pnCnwPTyF9meeO3qm6H?e-dFLkkWs&KC14#(>Pz*-+{_$yd9NtUd7$F z$=w1B@A$SCM0UU2_2aHj?)u(MZ|AW-en-!4+WWoT-<0}cdWZUX?Nt9y+4}!ZO#Q!Y z>;H{=)c+3Ze|>i;+G|1CFJbP{XWiqBKcZ{Qc%!z)x!}i&TeKvwzAV_e9wid3?al>^$uE0cae?YFlJ8%zN z1ni_7?tqWrdQ`5*@w5xS9+F>2+Cz<#?f`VsPNIE-Xy2ptM}e;# z(5w=B)WHim@}id6VAAhGaHykFp52^WhQe7xApRe=6|U@zcS#$Ye7d?72knH2`M z_m9~2|BM$w6&_M(f$gyu;0rYCQ9ghFGVdrL^}2WwMhbC=W*R50_uul_Z}Rewd5Mxl z-mmZ(38wK=%1S0fo_aHHnAR=zLwkAwXvI>t#xL)0GPjUZ|AcqKr|HrHfu%ygJYFgU z5{IRn;!`gbwik9buT+TOEcI+(1m|A=DAYnPYQYOsPP7FI2mJEBfhw1}6wcy%|2Z2# zG{*ZiUj8XBzs}2l!OMTii%@Z-=bEV<&~7`L@AJ?2?!V&YzvhLQslieOLH#j2_!Ou3 z5OHMS48$Q6{t(7Vh)YC&6Z0^-1H@U5GM~m-_?%oJ@{9NnTp_&a^sg6r6{Sa`pvaxZ za|+4fbgyuh3Q0$CmRN48kYWujPg2KC3glM8Z{kQyNt%;*(lCM}M*q@n`| zfi+>-+kN;j2GG)!FSM|&G`-lFrp!a%APIUgiIk*1!<6N#8ZS|lB~zxTlL{(}4DhU` z9}~b8ayZw4*=Smky2u4UO+$Yc1e;Ox=TEz7Fies-i$+0-_5dFuQyn@HS~oWjYObI$ zm;x{uFsS(+Cj{}g0aXGH0C0hU zdLJ+cGN1eL4SiqIq`+*J>O(l|fpg&8PWqoiy1YDp0XnwBU;y<1XBlktSH1xJK;`%W zoStdm97*p>lA3zPU#cHyK=W2|5X%{8^i?K+e+;oK|F711fjQ_pyq^Ybn_~)=3+fXv z9xWKwnARXD;y(0@uzpxElz@eR4es_g9&!h8vZNb_@fpGW5qI!TKXs@Z-R^ zI9@-FGh%olL|jE_+V=?}?Na=BkA#1^KsWaxWm*ltFOHQp`VqR3SH6qXwAf`yPN3i^Hx~u4J+|=J65JBT5k| zF;s>n>>|!AGCmaJf`^!rU(Die6jN_BiH^i@FQ^v+G=iH!C_?JDmKI2^eEGw@hzLz}KAas%IXhq~ z*vRajMHwkKrjJB7^KE)V(T!uNp z;$@n9%wL9w1~>=?2S7tf}J6nh}PtF{kJLKF$H^W2?O5N3Joi3FRMG zp;}p=Ja_LuV2UkX{tho~^z8TJ={T$4o5W*8dKkJ#AwomYU4YW_;r3Hk#>4iVR2)$Q z!4V+(gRG@NJIIurz2Je{vo~$e-h}tAdJY-~Qor5b$1nK6)o1GGsHT87qM}gG;H3Uu z$gaRG#doIOwF*8o1jKoKLwo%Wr~=_&rf{be5TI%RJ{QW}{tPIvAFQz@Y0zU(SJ282 zlr%yB4tgpJxnQ{NFNErbLJ?9x4T5a|tu&}wA$O)7QVLa%sC{$K_WTkoB}K6dSKu9j z5oC*B2g(w{2#mP~F)H|2(Q2o}oB+sieBh81Plpm5CxQpaxu_g2&P>u@0oGZngs7l)ZZhuX}n7FzrPjg5^#-=F&c?z3vk==n4-81oF^ zTAt0|8O-s57Yq8u!g$E(t#oK?Y}=N%ZOhZoP@X&s%F{1Up1eSLp@5YCj*st-NAnz9 z&f;Xo!Tey4<;w_U5i!6F9msMX+qvy@8rwyq3`axOteDu-6AlM@?CgYd4)f#$PcRQ> zIMQYbM$YUgiV+*+#8w*=(bAX5A$fUnZZhcShwY0jOlf&*pX3eJuk_aR&2}#ENftOy zwH-iYY&aN<#=(o-?~x9cCVc?B2OAMUsg^MHI z2ycUFW#$OV=LheiIA4oRDNT`sq^x5S7dr-ZL8R&d0*aTwQbQ^3xlC9?hh=|8FZ(C# zvJX88TE)(_H9KMj%V<#HM#VK7tNZ?%J^dk@Z3k}=$A$4VWz1&bLtG9$iFrw*iD2QW zqlk2rdI<#E78N-rW0s&<_V$1csJIp^6q@G>_c{+WM+sUi0yf?&yq2YIP1O0*!R?Gf z#!_X77|L)LRsjXIlvb5*Nr&BCgge zJbLPnDb21yVNm7eDS7wgzLt6ij1*|8 zfA-c8(6#j2iFbt6LH7Ih5*Xc7{<@?X-R}tdeS7KEL}f@6wK7PbH|cBe@LJ2g0~CmC zUU+zg2>$EouO+^kT7@B*q)d>Bf*Mn=1AT+XCirWLu-mP|09Y&V>m-b+?&2fp#8o;J zCf;FZ)-VI?!r^jgjg@fm7J{xyhyxAm=xI*lPazy^>v|DbOl$B2fV8q}mmbuQmGz>4 z$+r+Qmquk`HUXvznug@2aYT9Oz#702FT{ygun_N!CMa(MhSrbZ>L0~<10XLzbO`9$ zzTF$7_y0AN0XPfJr5Q-WM zsWcf<_|B|D6%x+Fo~=Rgz4iHON}1LpH5pc-Hi~<=J;Qwts++nTSckAAln8Zm*z_p1 zdE7x^Oe@g83$=v;lmfcIqJp)LP9aL^I>paXu|ZPwK#0`^b{ViTQA~$wg9|ra-k(4< zkY4ov7z-{B#e3KUx)5p|fyS~S%Yc>%0Ox;H~c)XX_7HOPz=62f=ha3{}TLb@xJ|@_k6j7NpP?e}O3UP<@!~eF6R1K7rq;AEHty3Cj0D zJcV*J`gaK9EMvO;C-_}}y=WR^xUSWPyWIkmZhEz-mg>B)^n_YX{x;Nt=q!Uv$gV81DFX}=nj%4v3Yr1U zIE$KvN||FoK#~wfghvS`gq+C6qK#y9t4*#7X`*uUUbN_qXyI2S)~&RK{&y?>idWuJ z;f8GVmCmKwvP|i0dr*w9Xp6QV&KM(v13{&8ReEbxXvd2SbCWm>LrhaP=O&EaixO(n zSq0y@WhQ`R$}s6O)QE3`F16rb2HP~`&fJ3)ZJrVd-6=a9mKFgu1g?5z`CcRu(m_Q9 zoaRW%D8DM`9H^U7@K@w-Uj3P$_=dkT{Fw()acgL9P=?N{GDH*FmFY8ZhWtVs<$Ek> z2_4|ae09#Y&;d8VKD8V5T!29X8Zihq;ApL)=iP?sE} zqX$6z*@R)D%#_YCFc==Ei-j;#r%`64k?1G~BP($`#P@bEjO}9_3bxi^jBOn<#xwUA z<4}2e1layix6xHKVj(IS&rDD6IXrfWnXa>0(^DW#1I>|8BiQ2>#$+%YA9@w&PJ||x zeX?yg)2aIvN){Q4PAgzloVx_wfxrZ+tRdFMsV85rosRyWe!Vu0&*QJxQ z&WP7*XOV+R&ft3nKPS#kpa{86PfyI8pLo3zitUSIY^&WZ5JS?53W*CRG61htXd)xo-HsbI+vjEg^n$lQBCZWfB(Dd&v3PxeJ1>KP{&zu^0xO@6?F^#Mh{Ba!*G#O8!O0O1%sc|N21zh#B?zs20ym(Q_wOy^ zURJ7q5fXc2=P}k!6ax6g5wv~HEL6I-Zh3#mMho#8m~-^-ndr%;{adidgttVWwhhnl z`FUPM-(wGV3~sa z{jY$vBhnHYBqU+%X&fr1AQ5@Qs{X@tld33dY_mM4uGP=bGaYfoj`u=3SFGalK!T5 ziXjzkkKT3|X%95_XF&^s9#!??H_QO(Ut;^m@SJY$3&jC!C9O^awy%T;g6A!XR1(ov z4#zJdzgq_d5>a88U!>IAYpDr!m|N{IPN2oWSj0%n3N1Mj8mIeClsk`0-V85OWXJQ;Q1E?ZnH!bLfK z&5}?B*$Z{s4MsIv*?=EV8KIY=*seJ&!@YzopXeHU5bC8oWiL@5>J0S~%?NslW=U3z z#{PPVF}hbTIg0pGz4S{(Mrwt39t1#d?<-e9OCM#Wh#GFogvWVPZxhHxsY<)*+7)os zih&jq1IYxqc01`2yo=6-Ejx^Z+=oB*e-GBFMPv;rgl~uq)!&r8<5Tm)-oz(UNvDER zOtKE&rq~Xz`B){H3doue1o)QS8+K$n5mz56PNXwOloN6MRH(>EtjL)s5oS+9=t+=A z3H8T>{F8uyr=2sO4kxiqMbt@#g+$=J)6Us*`p&*x)NkblL;0P-oqZebrhORbX_2&o zsexDCB|DDF|Nbo z1INtfWJiLw@!*^f+7#^)NP(CZG2*2yt$iFY)uMG}>9G1MYP4Yg%ZSgv@gl((;l z7hvBR18rqkwjh(MR2TvO6GKAnG?M3fZpo;$WwAZsxPLkk6uFSk%)ZG<4ZCa9fhw(* zun=J2A~rX+Fda(3gxLh|6?|9~H?tMF*mNF$%GsO}4iuvwwj3*)&ZnN@I=4j!teKW* ztoPn!jLB+tSrFAycG)?-%OnY6klN~kU3SjyvfwRt*)$7f)KZ4Z+{Z3kY}s27iks0@ z@ztUboV?(xt*IomB2BGB;@w#={|{AB{|a9rX*V?i{>PxH!aI0CHT_cb%6fc%g_}0-Smqyc0&j|IRy+hyM}p{!d=$ z&Td?Lva=MwCt~d?-*53EHX#2O?x>dtSXx;rO?!VF`90(PU8ejwUIL$Yzt5llD=$C6 z%TMwmM10DV!IgdsCBK7LUuKst;3jd&*whA&5ptl*he1;-N}Y;{bSyw>o8NSTISjW3 zBgwr_L@;5c9S|ndutr`%72~OAz2A^;1T{VGJ&s%NEnaT&vdhZ|FJI*4dwKbbynKn5 z+q}HZizp8KDDN)uLd#k2uk-RVywLNM_hY==;boSW%e>s;<@>u~>XYlc##O1?W;5$K+Os-e28D6#U{#?5DAucac+hzOIM42O{_{`6>0RCw8hGVK)P%29qaD@s8u5bZLZKvWx zTz-{ut&Cdsh+D-*KBMq?9X7UM=1gq32#h_5gpVGF73Ews<$ROm{^O*pml_QRh8Kyr8|&*{78tW>q!j~5%$0m6fwKWta+k2I5dP@R zKW8$UiJlx_vioCy(vUX;Zb*pESX8h{~MG(8Hmgl-j=PV@rL;M{X^ zy{H@+3X|KH;6EmS>%Ycw^FH9E!OQ={3s0{QPQWALk9lkraAN|vB82O)_C5wNT*2s- zKYnAODyv5p00at(@{FlNM+^vgh-XqQ&McxZt+n$k=>`MYcw=Y}9P6W2Y}!zO&ouHN zi>h=TH{4JR8w=A!RL)>7@;l)@hVK+Z041S=#!a_7%Qy8ToT<8SP=jp;?l1Vy+Y^~Y zEernhn~MKj%Q1YWoA^@dyYcQ-e85+gjkzG>zc(Z>+hGh3z**758}E{#_r8f1NtdCY zS7KbRD}3-dT$+U|g!JONQaj=gWoelgp3A))89q;dbx#)?yqfq+!^yo9v2z65K>C&p zIfRHz(=dwTDBMZfaY@zbIU}X|hL*`X`8P?!;c)Z19Rvc@m(O~9vx1NP;}G)Ce-4*0;v>Qh8l~hz zaoq>tmm~jn5|P~pT`G(v5!=1T?L`#zyxZsYb)h%73^n)%9LBJo1W z!x;?QwJn480Qv|T9RN>fN7Sv|TX3{mUIFo5UGs{Yn*fDj1PG7cf|(M?^__}#4uRvG z_N>aKf)IG3f)tcY{}D>#t9G+0aMjfAgSj(ja~fzYT2{(vU*LP4Xjv&>Tuzj*(7KtE(M-=Nf-G|^`4Wo0{Q1lZ9xdGw_ z^@O$~LF&}Ug5wBQseY-r^s3+D$wS7{*bb_|<3PT^D!EyL-~r#J2Q);K6Pe?=sI>BE zexXWHZ-fH|4hnFRx1f0dya`xUOX1QqQs_Z}lC4r&6I_rnTp$#!qLzyhLnGk_(t-o? z1T!|@nO4}O;fdLLVhT3}!eO4({ZQJ@B7DLmpfzfuSUN|b!zt?&cWRK#7`vvG>s2s} z)ObSi#H&JO2Q#8NY1dkDV+~xd+VxFXRimzjQpw#YF^twLJJ3M9KH*&5#M{YNkQ2e$ zDihA-YORXLRZj{s&nH!~i=|sQ^z~N2XtgdsTidu^Rr3nl5F~h1hHuo_EI|{+j)lfQ zwDd2$z_|o~vF;G{@uf9zbGShShUTtXHO3-GdUEVmL}#%bL)%44h_pxYe&qIOz0@M0 zgv7qWu-syhCbAkiGNFby`&=O_f))^;Ys0*m-LgGGPusTXfg# z#u3dVBMwWE;q0n<9SY3OCw77O+QAZ4dq7SJ75l7pSA=I#&y#V~6V_(^|K;R3c5<8# zGdU9TS7rd=emhOhCCWR@@oWGe7UC5bfJ68Jp;MHCk~JI=6=w#NJ)Cj*3Y1T)Uj*)M z$=yNlXjk|ILXuUgl}T&790V-WFwRaKHj;M0bV@ZxsVPU^Gme`z*;FR%k@xMU^wNE% zA=|=k4{k%VnB;q`W%vHO+L7nucI4f+RCq_mX21J}Ry$I3A8|lj1HC-~fXTR$5Z2uk zL4=xFIo$1R(m|O+6>$P)rJIkF7so9Sg40ua{(0h*f)yZnl)5-`l?}2Kp0akWbBIFh zq?tL2(K(+)t!?R)Ad#L+qWbAt1{AJXgl>0D`K`HP&P|>BC@?%W``s6ujPhdmEVlT) zun~pf2^R*=4J@FF67zvei46uM*F^-#!1uJM z!eNksX&@6VO}Fh;?UKQeB!EcuNp**TQFm=vg0W&&{BzKZL>C0t7k0OCIDiwT3~q4^ z>|1w7B+{S&!M?_!18;;PA1A)FvUa=>Pmky0Y-h(+YF9vdRqD9%Lc)kOxAe z+gjqbW@zm?@P>xqkx=#=h6*NPGLel0wo&?aI+@rWh9_onCsTVreRuXcCex}*hG$;w zY7$O%wP`GchwgLLDx48m26aqEoP}Tt3Q;MmDk#R-Ewplc7sfM)sI>#$fs*IFfc`eS zPzbY*XB1!c$MMknRbEcu0&eSLybqX5Wb^(5%dN}Ov3W}%Pr$xqM3FCp{beJ%q>;Zb4manuvFB)!Gn=<<%%D6`O!ule*2m~~*)rQX9x>$vB8Q>R+?*eSEgu451p=GG2? zbLymj3XJVEHm^snvPU=1A|*y3Q-nB_4lgeoHr|_j%ie~#X~IZD?TLSE`}^tQ6I zrLVR?(VSo5n&KaZ zc4>5MTh}dRG{4Q!W2qcHqm2&-lu(8G<7im6Ge$dzyRD!rh9}B%|%Xd`0jBWDq@VmC08KM-;9^ud` zyrJ12#Uq*r+p*SNDBy6SyexLAm*b_2mpm`sycBrp;iZ?CK3@8H8Q^7*mjk>E^D@NC zL0ll}Bk;gE1t8rT`LWr%ybK1vyjouVU9Rv)kvIX^(CHrnbwKh$YnZ*`t>7(Om~}eA zlZqeN0KbeIEKitdQ0T|yD|hr{4!`Gk25+WE{J^OgZmc|Iu)%Hx;)Kg5rSj`~u1{Up<}0al{Th9#l#u&LB44VO*MhKzyj3 zvjXAB@^S#24KgLT1yPbm*>niS63|K-%%pm|^ZCMHp^zUDgoRSXO*jMqqLOU(csrG4 zw+d#LUmG7*N?BB0Tk5uitU8Ukc9**N@M>v0bZEa%(1*vD{uJTQFb$!4~rN!^A`X}b~Z{+^Ux7RDhM7WaL!0_FyH^h YMEGB}<@Xm8FZVw+^u PY_SSIZE_T_MAX - s) s = OVERFLOW_ERROR; \ + else s += term; \ + } \ +} + + +typedef enum {NeedlemanWunschSmithWaterman, + Gotoh, + WatermanSmithBeyer, + Unknown} Algorithm; + +typedef enum {Global, Local} Mode; + +typedef struct { + unsigned char trace : 5; + unsigned char path : 3; +} Trace; + +typedef struct { + unsigned char Ix : 4; + unsigned char Iy : 4; +} TraceGapsGotoh; + +typedef struct { + int* MIx; + int* IyIx; + int* MIy; + int* IxIy; +} TraceGapsWatermanSmithBeyer; + +typedef struct { + PyObject_HEAD + Trace** M; + union { TraceGapsGotoh** gotoh; + TraceGapsWatermanSmithBeyer** waterman_smith_beyer; } gaps; + int nA; + int nB; + int iA; + int iB; + Mode mode; + Algorithm algorithm; + Py_ssize_t length; + unsigned char strand; +} PathGenerator; + +static PyObject* +PathGenerator_create_path(PathGenerator* self, int i, int j) { + PyObject* tuple; + PyObject* row; + PyObject* value; + int path; + const int ii = i; + const int jj = j; + int n = 1; + int direction = 0; + Trace** M = self->M; + const unsigned char strand = self->strand; + + while (1) { + path = M[i][j].path; + if (!path) break; + if (path != direction) { + n++; + direction = path; + } + switch (path) { + case HORIZONTAL: j++; break; + case VERTICAL: i++; break; + case DIAGONAL: i++; j++; break; + } + } + + i = ii; + j = jj; + direction = 0; + tuple = PyTuple_New(n); + if (!tuple) return NULL; + + n = 0; + switch (strand) { + case '+': + while (1) { + path = M[i][j].path; + if (path != direction) { + row = PyTuple_New(2); + if (!row) break; + value = PyLong_FromLong(i); + if (!value) { + Py_DECREF(row); /* all references were stolen */ + break; + } + PyTuple_SET_ITEM(row, 0, value); + value = PyLong_FromLong(j); + if (!value) { + Py_DECREF(row); /* all references were stolen */ + break; + } + PyTuple_SET_ITEM(row, 1, value); + PyTuple_SET_ITEM(tuple, n, row); + n++; + direction = path; + } + switch (path) { + case HORIZONTAL: j++; break; + case VERTICAL: i++; break; + case DIAGONAL: i++; j++; break; + default: return tuple; + } + } + break; + case '-': { + const int nB = self->nB; + while (1) { + path = M[i][j].path; + if (path != direction) { + row = PyTuple_New(2); + if (!row) break; + value = PyLong_FromLong(i); + if (!value) { + Py_DECREF(row); /* all references were stolen */ + break; + } + PyTuple_SET_ITEM(row, 0, value); + value = PyLong_FromLong(nB-j); + if (!value) { + Py_DECREF(row); /* all references were stolen */ + break; + } + PyTuple_SET_ITEM(row, 1, value); + PyTuple_SET_ITEM(tuple, n, row); + n++; + direction = path; + } + switch (path) { + case HORIZONTAL: j++; break; + case VERTICAL: i++; break; + case DIAGONAL: i++; j++; break; + default: return tuple; + } + } + break; + } + } + Py_DECREF(tuple); /* all references were stolen */ + return PyErr_NoMemory(); +} + +static Py_ssize_t +PathGenerator_needlemanwunsch_length(PathGenerator* self) +{ + int i; + int j; + int trace; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + Py_ssize_t term; + Py_ssize_t count = MEMORY_ERROR; + Py_ssize_t temp; + Py_ssize_t* counts; + counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!counts) goto exit; + counts[0] = 1; + for (j = 1; j <= nB; j++) { + trace = M[0][j].trace; + count = 0; + if (trace & HORIZONTAL) SAFE_ADD(counts[j-1], count); + counts[j] = count; + } + for (i = 1; i <= nA; i++) { + trace = M[i][0].trace; + count = 0; + if (trace & VERTICAL) SAFE_ADD(counts[0], count); + temp = counts[0]; + counts[0] = count; + for (j = 1; j <= nB; j++) { + trace = M[i][j].trace; + count = 0; + if (trace & HORIZONTAL) SAFE_ADD(counts[j-1], count); + if (trace & VERTICAL) SAFE_ADD(counts[j], count); + if (trace & DIAGONAL) SAFE_ADD(temp, count); + temp = counts[j]; + counts[j] = count; + } + } + PyMem_Free(counts); +exit: + return count; +} + +static Py_ssize_t +PathGenerator_smithwaterman_length(PathGenerator* self) +{ + int i; + int j; + int trace; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + Py_ssize_t term; + Py_ssize_t count = MEMORY_ERROR; + Py_ssize_t total = 0; + Py_ssize_t temp; + Py_ssize_t* counts; + counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!counts) goto exit; + counts[0] = 1; + for (j = 1; j <= nB; j++) counts[j] = 1; + for (i = 1; i <= nA; i++) { + temp = counts[0]; + counts[0] = 1; + for (j = 1; j <= nB; j++) { + trace = M[i][j].trace; + count = 0; + if (trace & DIAGONAL) SAFE_ADD(temp, count); + if (M[i][j].trace & ENDPOINT) SAFE_ADD(count, total); + if (trace & HORIZONTAL) SAFE_ADD(counts[j-1], count); + if (trace & VERTICAL) SAFE_ADD(counts[j], count); + temp = counts[j]; + if (count == 0 && (trace & STARTPOINT)) count = 1; + counts[j] = count; + } + } + count = total; + PyMem_Free(counts); +exit: + return count; +} + +static Py_ssize_t +PathGenerator_gotoh_global_length(PathGenerator* self) +{ + int i; + int j; + int trace; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + TraceGapsGotoh** gaps = self->gaps.gotoh; + Py_ssize_t count = MEMORY_ERROR; + Py_ssize_t term; + Py_ssize_t M_temp; + Py_ssize_t Ix_temp; + Py_ssize_t Iy_temp; + Py_ssize_t* M_counts = NULL; + Py_ssize_t* Ix_counts = NULL; + Py_ssize_t* Iy_counts = NULL; + M_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!M_counts) goto exit; + Ix_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!Ix_counts) goto exit; + Iy_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!Iy_counts) goto exit; + M_counts[0] = 1; + Ix_counts[0] = 0; + Iy_counts[0] = 0; + for (j = 1; j <= nB; j++) { + M_counts[j] = 0; + Ix_counts[j] = 0; + Iy_counts[j] = 1; + } + for (i = 1; i <= nA; i++) { + M_temp = M_counts[0]; + M_counts[0] = 0; + Ix_temp = Ix_counts[0]; + Ix_counts[0] = 1; + Iy_temp = Iy_counts[0]; + Iy_counts[0] = 0; + for (j = 1; j <= nB; j++) { + count = 0; + trace = M[i][j].trace; + if (trace & M_MATRIX) SAFE_ADD(M_temp, count); + if (trace & Ix_MATRIX) SAFE_ADD(Ix_temp, count); + if (trace & Iy_MATRIX) SAFE_ADD(Iy_temp, count); + M_temp = M_counts[j]; + M_counts[j] = count; + count = 0; + trace = gaps[i][j].Ix; + if (trace & M_MATRIX) SAFE_ADD(M_temp, count); + if (trace & Ix_MATRIX) SAFE_ADD(Ix_counts[j], count); + if (trace & Iy_MATRIX) SAFE_ADD(Iy_counts[j], count); + Ix_temp = Ix_counts[j]; + Ix_counts[j] = count; + count = 0; + trace = gaps[i][j].Iy; + if (trace & M_MATRIX) SAFE_ADD(M_counts[j-1], count); + if (trace & Ix_MATRIX) SAFE_ADD(Ix_counts[j-1], count); + if (trace & Iy_MATRIX) SAFE_ADD(Iy_counts[j-1], count); + Iy_temp = Iy_counts[j]; + Iy_counts[j] = count; + } + } + count = 0; + if (M[nA][nB].trace) SAFE_ADD(M_counts[nB], count); + if (gaps[nA][nB].Ix) SAFE_ADD(Ix_counts[nB], count); + if (gaps[nA][nB].Iy) SAFE_ADD(Iy_counts[nB], count); +exit: + if (M_counts) PyMem_Free(M_counts); + if (Ix_counts) PyMem_Free(Ix_counts); + if (Iy_counts) PyMem_Free(Iy_counts); + return count; +} + +static Py_ssize_t +PathGenerator_gotoh_local_length(PathGenerator* self) +{ + int i; + int j; + int trace; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + TraceGapsGotoh** gaps = self->gaps.gotoh; + Py_ssize_t term; + Py_ssize_t count = MEMORY_ERROR; + Py_ssize_t total = 0; + Py_ssize_t M_temp; + Py_ssize_t Ix_temp; + Py_ssize_t Iy_temp; + Py_ssize_t* M_counts = NULL; + Py_ssize_t* Ix_counts = NULL; + Py_ssize_t* Iy_counts = NULL; + M_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!M_counts) goto exit; + Ix_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!Ix_counts) goto exit; + Iy_counts = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!Iy_counts) goto exit; + M_counts[0] = 1; + Ix_counts[0] = 0; + Iy_counts[0] = 0; + for (j = 1; j <= nB; j++) { + M_counts[j] = 1; + Ix_counts[j] = 0; + Iy_counts[j] = 0; + } + for (i = 1; i <= nA; i++) { + M_temp = M_counts[0]; + M_counts[0] = 1; + Ix_temp = Ix_counts[0]; + Ix_counts[0] = 0; + Iy_temp = Iy_counts[0]; + Iy_counts[0] = 0; + for (j = 1; j <= nB; j++) { + count = 0; + trace = M[i][j].trace; + if (trace & M_MATRIX) SAFE_ADD(M_temp, count); + if (trace & Ix_MATRIX) SAFE_ADD(Ix_temp, count); + if (trace & Iy_MATRIX) SAFE_ADD(Iy_temp, count); + if (count == 0 && (trace & STARTPOINT)) count = 1; + M_temp = M_counts[j]; + M_counts[j] = count; + if (M[i][j].trace & ENDPOINT) SAFE_ADD(count, total); + count = 0; + trace = gaps[i][j].Ix; + if (trace & M_MATRIX) SAFE_ADD(M_temp, count); + if (trace & Ix_MATRIX) SAFE_ADD(Ix_counts[j], count); + if (trace & Iy_MATRIX) SAFE_ADD(Iy_counts[j], count); + Ix_temp = Ix_counts[j]; + Ix_counts[j] = count; + count = 0; + trace = gaps[i][j].Iy; + if (trace & M_MATRIX) SAFE_ADD(M_counts[j-1], count); + if (trace & Ix_MATRIX) SAFE_ADD(Ix_counts[j-1], count); + if (trace & Iy_MATRIX) SAFE_ADD(Iy_counts[j-1], count); + Iy_temp = Iy_counts[j]; + Iy_counts[j] = count; + } + } + count = total; +exit: + if (M_counts) PyMem_Free(M_counts); + if (Ix_counts) PyMem_Free(Ix_counts); + if (Iy_counts) PyMem_Free(Iy_counts); + return count; +} + +static Py_ssize_t +PathGenerator_waterman_smith_beyer_global_length(PathGenerator* self) +{ + int i; + int j; + int trace; + int* p; + int gap; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer; + Py_ssize_t count = MEMORY_ERROR; + Py_ssize_t term; + Py_ssize_t** M_count = NULL; + Py_ssize_t** Ix_count = NULL; + Py_ssize_t** Iy_count = NULL; + M_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*)); + if (!M_count) goto exit; + Ix_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*)); + if (!Ix_count) goto exit; + Iy_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*)); + if (!Iy_count) goto exit; + for (i = 0; i <= nA; i++) { + M_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!M_count[i]) goto exit; + Ix_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!Ix_count[i]) goto exit; + Iy_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!Iy_count[i]) goto exit; + } + for (i = 0; i <= nA; i++) { + for (j = 0; j <= nB; j++) { + count = 0; + trace = M[i][j].trace; + if (trace & M_MATRIX) SAFE_ADD(M_count[i-1][j-1], count); + if (trace & Ix_MATRIX) SAFE_ADD(Ix_count[i-1][j-1], count); + if (trace & Iy_MATRIX) SAFE_ADD(Iy_count[i-1][j-1], count); + if (count == 0) count = 1; /* happens at M[0][0] only */ + M_count[i][j] = count; + count = 0; + p = gaps[i][j].MIx; + if (p) { + while (1) { + gap = *p; + if (!gap) break; + SAFE_ADD(M_count[i-gap][j], count); + p++; + } + } + p = gaps[i][j].IyIx; + if (p) { + while (1) { + gap = *p; + if (!gap) break; + SAFE_ADD(Iy_count[i-gap][j], count); + p++; + } + } + Ix_count[i][j] = count; + count = 0; + p = gaps[i][j].MIy; + if (p) { + while (1) { + gap = *p; + if (!gap) break; + SAFE_ADD(M_count[i][j-gap], count); + p++; + } + } + p = gaps[i][j].IxIy; + if (p) { + while (1) { + gap = *p; + if (!gap) break; + SAFE_ADD(Ix_count[i][j-gap], count); + p++; + } + } + Iy_count[i][j] = count; + } + } + count = 0; + if (M[nA][nB].trace) + SAFE_ADD(M_count[nA][nB], count); + if (gaps[nA][nB].MIx[0] || gaps[nA][nB].IyIx[0]) + SAFE_ADD(Ix_count[nA][nB], count); + if (gaps[nA][nB].MIy[0] || gaps[nA][nB].IxIy[0]) + SAFE_ADD(Iy_count[nA][nB], count); +exit: + if (M_count) { + if (Ix_count) { + if (Iy_count) { + for (i = 0; i <= nA; i++) { + if (!M_count[i]) break; + PyMem_Free(M_count[i]); + if (!Ix_count[i]) break; + PyMem_Free(Ix_count[i]); + if (!Iy_count[i]) break; + PyMem_Free(Iy_count[i]); + } + PyMem_Free(Iy_count); + } + PyMem_Free(Ix_count); + } + PyMem_Free(M_count); + } + return count; +} + +static Py_ssize_t +PathGenerator_waterman_smith_beyer_local_length(PathGenerator* self) +{ + int i; + int j; + int trace; + int* p; + int gap; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer; + Py_ssize_t term; + Py_ssize_t count = MEMORY_ERROR; + Py_ssize_t total = 0; + Py_ssize_t** M_count = NULL; + Py_ssize_t** Ix_count = NULL; + Py_ssize_t** Iy_count = NULL; + M_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*)); + if (!M_count) goto exit; + Ix_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*)); + if (!Ix_count) goto exit; + Iy_count = PyMem_Malloc((nA+1)*sizeof(Py_ssize_t*)); + if (!Iy_count) goto exit; + for (i = 0; i <= nA; i++) { + M_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!M_count[i]) goto exit; + Ix_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!Ix_count[i]) goto exit; + Iy_count[i] = PyMem_Malloc((nB+1)*sizeof(Py_ssize_t)); + if (!Iy_count[i]) goto exit; + } + for (i = 0; i <= nA; i++) { + for (j = 0; j <= nB; j++) { + count = 0; + trace = M[i][j].trace; + if (trace & M_MATRIX) SAFE_ADD(M_count[i-1][j-1], count); + if (trace & Ix_MATRIX) SAFE_ADD(Ix_count[i-1][j-1], count); + if (trace & Iy_MATRIX) SAFE_ADD(Iy_count[i-1][j-1], count); + if (count == 0 && (trace & STARTPOINT)) count = 1; + M_count[i][j] = count; + if (M[i][j].trace & ENDPOINT) SAFE_ADD(count, total); + count = 0; + p = gaps[i][j].MIx; + if (p) { + while (1) { + gap = *p; + if (!gap) break; + SAFE_ADD(M_count[i-gap][j], count); + p++; + } + } + p = gaps[i][j].IyIx; + if (p) { + while (1) { + gap = *p; + if (!gap) break; + SAFE_ADD(Iy_count[i-gap][j], count); + p++; + } + } + Ix_count[i][j] = count; + count = 0; + p = gaps[i][j].MIy; + if (p) { + while (1) { + gap = *p; + if (!gap) break; + SAFE_ADD(M_count[i][j-gap], count); + p++; + } + } + p = gaps[i][j].IxIy; + if (p) { + while (1) { + gap = *p; + if (!gap) break; + SAFE_ADD(Ix_count[i][j-gap], count); + p++; + } + } + Iy_count[i][j] = count; + } + } + count = total; +exit: + if (M_count) { + if (Ix_count) { + if (Iy_count) { + for (i = 0; i <= nA; i++) { + if (!M_count[i]) break; + PyMem_Free(M_count[i]); + if (!Ix_count[i]) break; + PyMem_Free(Ix_count[i]); + if (!Iy_count[i]) break; + PyMem_Free(Iy_count[i]); + } + PyMem_Free(Iy_count); + } + PyMem_Free(Ix_count); + } + PyMem_Free(M_count); + } + return count; +} + +static Py_ssize_t PathGenerator_length(PathGenerator* self) { + Py_ssize_t length = self->length; + if (length == 0) { + switch (self->algorithm) { + case NeedlemanWunschSmithWaterman: + switch (self->mode) { + case Global: + length = PathGenerator_needlemanwunsch_length(self); + break; + case Local: + length = PathGenerator_smithwaterman_length(self); + break; + default: + /* should not happen, but some compilers complain that + * that length can be used uninitialized. + */ + PyErr_SetString(PyExc_RuntimeError, "Unknown mode"); + return -1; + } + break; + case Gotoh: + switch (self->mode) { + case Global: + length = PathGenerator_gotoh_global_length(self); + break; + case Local: + length = PathGenerator_gotoh_local_length(self); + break; + default: + /* should not happen, but some compilers complain that + * that length can be used uninitialized. + */ + PyErr_SetString(PyExc_RuntimeError, "Unknown mode"); + return -1; + } + break; + case WatermanSmithBeyer: + switch (self->mode) { + case Global: + length = PathGenerator_waterman_smith_beyer_global_length(self); + break; + case Local: + length = PathGenerator_waterman_smith_beyer_local_length(self); + break; + default: + /* should not happen, but some compilers complain that + * that length can be used uninitialized. + */ + PyErr_SetString(PyExc_RuntimeError, "Unknown mode"); + return -1; + } + break; + case Unknown: + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown algorithm"); + return -1; + } + self->length = length; + } + switch (length) { + case OVERFLOW_ERROR: + PyErr_Format(PyExc_OverflowError, + "number of optimal alignments is larger than %zd", + PY_SSIZE_T_MAX); + break; + case MEMORY_ERROR: + PyErr_SetNone(PyExc_MemoryError); + break; + default: + break; + } + return length; +} + +static void +PathGenerator_dealloc(PathGenerator* self) +{ + int i; + const int nA = self->nA; + const Algorithm algorithm = self->algorithm; + Trace** M = self->M; + if (M) { + for (i = 0; i <= nA; i++) { + if (!M[i]) break; + PyMem_Free(M[i]); + } + PyMem_Free(M); + } + switch (algorithm) { + case NeedlemanWunschSmithWaterman: + break; + case Gotoh: { + TraceGapsGotoh** gaps = self->gaps.gotoh; + if (gaps) { + for (i = 0; i <= nA; i++) { + if (!gaps[i]) break; + PyMem_Free(gaps[i]); + } + PyMem_Free(gaps); + } + break; + } + case WatermanSmithBeyer: { + TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer; + if (gaps) { + int j; + const int nB = self->nB; + int* trace; + for (i = 0; i <= nA; i++) { + if (!gaps[i]) break; + for (j = 0; j <= nB; j++) { + trace = gaps[i][j].MIx; + if (trace) PyMem_Free(trace); + trace = gaps[i][j].IyIx; + if (trace) PyMem_Free(trace); + trace = gaps[i][j].MIy; + if (trace) PyMem_Free(trace); + trace = gaps[i][j].IxIy; + if (trace) PyMem_Free(trace); + } + PyMem_Free(gaps[i]); + } + PyMem_Free(gaps); + } + break; + } + case Unknown: + default: + PyErr_WriteUnraisable((PyObject*)self); + break; + } + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyObject* PathGenerator_next_needlemanwunsch(PathGenerator* self) +{ + int i = 0; + int j = 0; + int path; + int trace = 0; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + + path = M[i][j].path; + if (path == DONE) return NULL; + if (path == 0) { + /* Generate the first path. */ + i = nA; + j = nB; + } + else { + /* We already have a path. Prune the path to see if there are + * any alternative paths. */ + while (1) { + if (path == HORIZONTAL) { + trace = M[i][++j].trace; + if (trace & VERTICAL) { + M[--i][j].path = VERTICAL; + break; + } + if (trace & DIAGONAL) { + M[--i][--j].path = DIAGONAL; + break; + } + } + else if (path == VERTICAL) { + trace = M[++i][j].trace; + if (trace & DIAGONAL) { + M[--i][--j].path = DIAGONAL; + break; + } + } + else /* DIAGONAL */ { + i++; + j++; + } + path = M[i][j].path; + if (!path) { + /* we reached the end of the alignment without finding + * an alternative path */ + M[0][0].path = DONE; + return NULL; + } + } + } + /* Follow the traceback until we reach the origin. */ + while (1) { + trace = M[i][j].trace; + if (trace & HORIZONTAL) M[i][--j].path = HORIZONTAL; + else if (trace & VERTICAL) M[--i][j].path = VERTICAL; + else if (trace & DIAGONAL) M[--i][--j].path = DIAGONAL; + else break; + } + return PathGenerator_create_path(self, 0, 0); +} + +static PyObject* PathGenerator_next_smithwaterman(PathGenerator* self) +{ + int trace = 0; + int i = self->iA; + int j = self->iB; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + int path = M[0][0].path; + + if (path == DONE || path == NONE) return NULL; + + path = M[i][j].path; + if (path) { + /* We already have a path. Prune the path to see if there are + * any alternative paths. */ + while (1) { + if (path == HORIZONTAL) { + trace = M[i][++j].trace; + if (trace & VERTICAL) { + M[--i][j].path = VERTICAL; + break; + } + else if (trace & DIAGONAL) { + M[--i][--j].path = DIAGONAL; + break; + } + } + else if (path == VERTICAL) { + trace = M[++i][j].trace; + if (trace & DIAGONAL) { + M[--i][--j].path = DIAGONAL; + break; + } + } + else /* DIAGONAL */ { + i++; + j++; + } + path = M[i][j].path; + if (!path) break; + } + } + + if (path) { + trace = M[i][j].trace; + } else { + /* Find a suitable end point for a path. + * Only allow end points ending at the M matrix. */ + while (1) { + if (j < nB) j++; + else if (i < nA) { + i++; + j = 0; + } + else { + /* we reached the end of the sequences without finding + * an alternative path */ + M[0][0].path = DONE; + return NULL; + } + trace = M[i][j].trace; + if (trace & ENDPOINT) { + trace &= DIAGONAL; /* exclude paths ending in a gap */ + break; + } + } + M[i][j].path = 0; + } + + /* Follow the traceback until we reach the origin. */ + while (1) { + if (trace & HORIZONTAL) M[i][--j].path = HORIZONTAL; + else if (trace & VERTICAL) M[--i][j].path = VERTICAL; + else if (trace & DIAGONAL) M[--i][--j].path = DIAGONAL; + else if (trace & STARTPOINT) { + self->iA = i; + self->iB = j; + return PathGenerator_create_path(self, i, j); + } + else { + PyErr_SetString(PyExc_RuntimeError, + "Unexpected trace in PathGenerator_next_smithwaterman"); + return NULL; + } + trace = M[i][j].trace; + } +} + +static PyObject* PathGenerator_next_gotoh_global(PathGenerator* self) +{ + int i = 0; + int j = 0; + int m; + int path; + int trace = 0; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + TraceGapsGotoh** gaps = self->gaps.gotoh; + + m = M_MATRIX; + path = M[i][j].path; + if (path == DONE) return NULL; + if (path == 0) { + i = nA; + j = nB; + } + else { + /* We already have a path. Prune the path to see if there are + * any alternative paths. */ + while (1) { + path = M[i][j].path; + if (path == 0) { + switch (m) { + case M_MATRIX: m = Ix_MATRIX; break; + case Ix_MATRIX: m = Iy_MATRIX; break; + case Iy_MATRIX: m = 0; break; + } + break; + } + switch (path) { + case HORIZONTAL: trace = gaps[i][++j].Iy; break; + case VERTICAL: trace = gaps[++i][j].Ix; break; + case DIAGONAL: trace = M[++i][++j].trace; break; + } + switch (m) { + case M_MATRIX: + if (trace & Ix_MATRIX) { + m = Ix_MATRIX; + break; + } + case Ix_MATRIX: + if (trace & Iy_MATRIX) { + m = Iy_MATRIX; + break; + } + case Iy_MATRIX: + default: + switch (path) { + case HORIZONTAL: m = Iy_MATRIX; break; + case VERTICAL: m = Ix_MATRIX; break; + case DIAGONAL: m = M_MATRIX; break; + } + continue; + } + switch (path) { + case HORIZONTAL: j--; break; + case VERTICAL: i--; break; + case DIAGONAL: i--; j--; break; + } + M[i][j].path = path; + break; + } + } + + if (path == 0) { + /* Generate a new path. */ + switch (m) { + case M_MATRIX: + if (M[nA][nB].trace) { + /* m = M_MATRIX; */ + break; + } + case Ix_MATRIX: + if (gaps[nA][nB].Ix) { + m = Ix_MATRIX; + break; + } + case Iy_MATRIX: + if (gaps[nA][nB].Iy) { + m = Iy_MATRIX; + break; + } + default: + /* exhausted this generator */ + M[0][0].path = DONE; + return NULL; + } + } + + switch (m) { + case M_MATRIX: + trace = M[i][j].trace; + path = DIAGONAL; + i--; j--; + break; + case Ix_MATRIX: + trace = gaps[i][j].Ix; + path = VERTICAL; + i--; + break; + case Iy_MATRIX: + trace = gaps[i][j].Iy; + path = HORIZONTAL; + j--; + break; + } + + while (1) { + if (trace & M_MATRIX) { + trace = M[i][j].trace; + M[i][j].path = path; + path = DIAGONAL; + i--; j--; + } + else if (trace & Ix_MATRIX) { + M[i][j].path = path; + trace = gaps[i][j].Ix; + path = VERTICAL; + i--; + } + else if (trace & Iy_MATRIX) { + M[i][j].path = path; + trace = gaps[i][j].Iy; + path = HORIZONTAL; + j--; + } + else break; + } + return PathGenerator_create_path(self, 0, 0); +} + +static PyObject* PathGenerator_next_gotoh_local(PathGenerator* self) +{ + int trace = 0; + int i; + int j; + int m = M_MATRIX; + int iA = self->iA; + int iB = self->iB; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + TraceGapsGotoh** gaps = self->gaps.gotoh; + int path = M[0][0].path; + + if (path == DONE) return NULL; + + path = M[iA][iB].path; + + if (path) { + i = iA; + j = iB; + while (1) { + /* We already have a path. Prune the path to see if there are + * any alternative paths. */ + path = M[i][j].path; + if (path == 0) { + m = M_MATRIX; + iA = i; + iB = j; + break; + } + switch (path) { + case HORIZONTAL: trace = gaps[i][++j].Iy; break; + case VERTICAL: trace = gaps[++i][j].Ix; break; + case DIAGONAL: trace = M[++i][++j].trace; break; + } + switch (m) { + case M_MATRIX: + if (trace & Ix_MATRIX) { + m = Ix_MATRIX; + break; + } + case Ix_MATRIX: + if (trace & Iy_MATRIX) { + m = Iy_MATRIX; + break; + } + case Iy_MATRIX: + default: + switch (path) { + case HORIZONTAL: m = Iy_MATRIX; break; + case VERTICAL: m = Ix_MATRIX; break; + case DIAGONAL: m = M_MATRIX; break; + } + continue; + } + switch (path) { + case HORIZONTAL: j--; break; + case VERTICAL: i--; break; + case DIAGONAL: i--; j--; break; + } + M[i][j].path = path; + break; + } + } + + if (path == 0) { + /* Find the end point for a new path. */ + while (1) { + if (iB < nB) iB++; + else if (iA < nA) { + iA++; + iB = 0; + } + else { + /* we reached the end of the alignment without finding + * an alternative path */ + M[0][0].path = DONE; + return NULL; + } + if (M[iA][iB].trace & ENDPOINT) { + M[iA][iB].path = 0; + break; + } + } + m = M_MATRIX; + i = iA; + j = iB; + } + + while (1) { + switch (m) { + case M_MATRIX: trace = M[i][j].trace; break; + case Ix_MATRIX: trace = gaps[i][j].Ix; break; + case Iy_MATRIX: trace = gaps[i][j].Iy; break; + } + if (trace == STARTPOINT) { + self->iA = i; + self->iB = j; + return PathGenerator_create_path(self, i, j); + } + switch (m) { + case M_MATRIX: + path = DIAGONAL; + i--; + j--; + break; + case Ix_MATRIX: + path = VERTICAL; + i--; + break; + case Iy_MATRIX: + path = HORIZONTAL; + j--; + break; + } + if (trace & M_MATRIX) m = M_MATRIX; + else if (trace & Ix_MATRIX) m = Ix_MATRIX; + else if (trace & Iy_MATRIX) m = Iy_MATRIX; + else { + PyErr_SetString(PyExc_RuntimeError, + "Unexpected trace in PathGenerator_next_gotoh_local"); + return NULL; + } + M[i][j].path = path; + } + return NULL; +} + +static PyObject* +PathGenerator_next_waterman_smith_beyer_global(PathGenerator* self) +{ + int i = 0, j = 0; + int iA, iB; + int trace; + int* gapM; + int* gapXY; + + int m = M_MATRIX; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer; + + int gap; + int path = M[0][0].path; + + if (path == DONE) return NULL; + + if (path) { + /* We already have a path. Prune the path to see if there are + * any alternative paths. */ + while (1) { + if (!path) { + m <<= 1; + break; + } + switch (path) { + case HORIZONTAL: + iA = i; + iB = j; + while (M[i][iB].path == HORIZONTAL) iB++; + break; + case VERTICAL: + iA = i; + while (M[iA][j].path == VERTICAL) iA++; + iB = j; + break; + case DIAGONAL: + iA = i + 1; + iB = j + 1; + break; + default: + PyErr_SetString(PyExc_RuntimeError, + "Unexpected path in PathGenerator_next_waterman_smith_beyer_global"); + return NULL; + } + if (i == iA) { /* HORIZONTAL */ + gapM = gaps[iA][iB].MIy; + gapXY = gaps[iA][iB].IxIy; + if (m == M_MATRIX) { + gap = iB - j; + while (*gapM != gap) gapM++; + gapM++; + gap = *gapM; + if (gap) { + j = iB - gap; + while (j < iB) M[i][--iB].path = HORIZONTAL; + break; + } + } else if (m == Ix_MATRIX) { + gap = iB - j; + while (*gapXY != gap) gapXY++; + gapXY++; + } + gap = *gapXY; + if (gap) { + m = Ix_MATRIX; + j = iB - gap; + while (j < iB) M[i][--iB].path = HORIZONTAL; + break; + } + /* no alternative found; continue pruning */ + m = Iy_MATRIX; + j = iB; + } + else if (j == iB) { /* VERTICAL */ + gapM = gaps[iA][iB].MIx; + gapXY = gaps[iA][iB].IyIx; + if (m == M_MATRIX) { + gap = iA - i; + while (*gapM != gap) gapM++; + gapM++; + gap = *gapM; + if (gap) { + i = iA - gap; + while (i < iA) M[--iA][j].path = VERTICAL; + break; + } + } else if (m == Iy_MATRIX) { + gap = iA - i; + while (*gapXY != gap) gapXY++; + gapXY++; + } + gap = *gapXY; + if (gap) { + m = Iy_MATRIX; + i = iA - gap; + while (i < iA) M[--iA][j].path = VERTICAL; + break; + } + /* no alternative found; continue pruning */ + m = Ix_MATRIX; + i = iA; + } + else { /* DIAGONAL */ + i = iA - 1; + j = iB - 1; + trace = M[iA][iB].trace; + switch (m) { + case M_MATRIX: + if (trace & Ix_MATRIX) { + m = Ix_MATRIX; + M[i][j].path = DIAGONAL; + break; + } + case Ix_MATRIX: + if (trace & Iy_MATRIX) { + m = Iy_MATRIX; + M[i][j].path = DIAGONAL; + break; + } + case Iy_MATRIX: + default: + /* no alternative found; continue pruning */ + m = M_MATRIX; + i = iA; + j = iB; + path = M[i][j].path; + continue; + } + /* alternative found; build path until starting point */ + break; + } + path = M[i][j].path; + } + } + + if (!path) { + /* Find a suitable end point for a path. */ + switch (m) { + case M_MATRIX: + if (M[nA][nB].trace) { + /* m = M_MATRIX; */ + break; + } + case Ix_MATRIX: + if (gaps[nA][nB].MIx[0] || gaps[nA][nB].IyIx[0]) { + m = Ix_MATRIX; + break; + } + case Iy_MATRIX: + if (gaps[nA][nB].MIy[0] || gaps[nA][nB].IxIy[0]) { + m = Iy_MATRIX; + break; + } + default: + M[0][0].path = DONE; + return NULL; + } + i = nA; + j = nB; + } + + /* Follow the traceback until we reach the origin. */ + while (1) { + switch (m) { + case M_MATRIX: + trace = M[i][j].trace; + if (trace & M_MATRIX) m = M_MATRIX; + else if (trace & Ix_MATRIX) m = Ix_MATRIX; + else if (trace & Iy_MATRIX) m = Iy_MATRIX; + else return PathGenerator_create_path(self, i, j); + i--; + j--; + M[i][j].path = DIAGONAL; + break; + case Ix_MATRIX: + gap = gaps[i][j].MIx[0]; + if (gap) m = M_MATRIX; + else { + gap = gaps[i][j].IyIx[0]; + m = Iy_MATRIX; + } + iA = i - gap; + while (iA < i) M[--i][j].path = VERTICAL; + M[i][j].path = VERTICAL; + break; + case Iy_MATRIX: + gap = gaps[i][j].MIy[0]; + if (gap) m = M_MATRIX; + else { + gap = gaps[i][j].IxIy[0]; + m = Ix_MATRIX; + } + iB = j - gap; + while (iB < j) M[i][--j].path = HORIZONTAL; + M[i][j].path = HORIZONTAL; + break; + } + } +} + +static PyObject* +PathGenerator_next_waterman_smith_beyer_local(PathGenerator* self) +{ + int i, j, m; + int trace = 0; + int* gapM; + int* gapXY; + + int iA = self->iA; + int iB = self->iB; + const int nA = self->nA; + const int nB = self->nB; + Trace** M = self->M; + TraceGapsWatermanSmithBeyer** gaps = self->gaps.waterman_smith_beyer; + + int gap; + int path = M[0][0].path; + + if (path == DONE) return NULL; + m = 0; + path = M[iA][iB].path; + if (path) { + /* We already have a path. Prune the path to see if there are + * any alternative paths. */ + m = M_MATRIX; + i = iA; + j = iB; + while (1) { + path = M[i][j].path; + switch (path) { + case HORIZONTAL: + iA = i; + iB = j; + while (M[i][iB].path == HORIZONTAL) iB++; + break; + case VERTICAL: + iA = i; + iB = j; + while (M[iA][j].path == VERTICAL) iA++; + break; + case DIAGONAL: + iA = i + 1; + iB = j + 1; + break; + default: + iA = -1; + break; + } + if (iA < 0) { + m = 0; + iA = i; + iB = j; + break; + } + if (i == iA) { /* HORIZONTAL */ + gapM = gaps[iA][iB].MIy; + gapXY = gaps[iA][iB].IxIy; + if (m == M_MATRIX) { + gap = iB - j; + while (*gapM != gap) gapM++; + gapM++; + gap = *gapM; + if (gap) { + j = iB - gap; + while (j < iB) M[i][--iB].path = HORIZONTAL; + break; + } + } else if (m == Ix_MATRIX) { + gap = iB - j; + while (*gapXY != gap) gapXY++; + gapXY++; + } + gap = *gapXY; + if (gap) { + m = Ix_MATRIX; + j = iB - gap; + M[i][j].path = HORIZONTAL; + while (iB > j) M[i][--iB].path = HORIZONTAL; + break; + } + /* no alternative found; continue pruning */ + m = Iy_MATRIX; + j = iB; + } + else if (j == iB) { /* VERTICAL */ + gapM = gaps[iA][iB].MIx; + gapXY = gaps[iA][iB].IyIx; + if (m == M_MATRIX) { + gap = iA - i; + while (*gapM != gap) gapM++; + gapM++; + gap = *gapM; + if (gap) { + i = iA - gap; + while (i < iA) M[--iA][j].path = VERTICAL; + break; + } + } else if (m == Iy_MATRIX) { + gap = iA - i; + while (*gapXY != gap) gapXY++; + gapXY++; + } + gap = *gapXY; + if (gap) { + m = Iy_MATRIX; + i = iA - gap; + M[i][j].path = VERTICAL; + while (iA > i) M[--iA][j].path = VERTICAL; + break; + } + /* no alternative found; continue pruning */ + m = Ix_MATRIX; + i = iA; + } + else { /* DIAGONAL */ + i = iA - 1; + j = iB - 1; + trace = M[iA][iB].trace; + switch (m) { + case M_MATRIX: + if (trace & Ix_MATRIX) { + m = Ix_MATRIX; + M[i][j].path = DIAGONAL; + break; + } + case Ix_MATRIX: + if (trace & Iy_MATRIX) { + m = Iy_MATRIX; + M[i][j].path = DIAGONAL; + break; + } + case Iy_MATRIX: + default: + /* no alternative found; continue pruning */ + m = M_MATRIX; + i = iA; + j = iB; + continue; + } + /* alternative found; build path until starting point */ + break; + } + } + } + + if (m == 0) { + /* We are at [nA][nB]. Find a suitable end point for a path. */ + while (1) { + if (iB < nB) iB++; + else if (iA < nA) { + iA++; + iB = 0; + } + else { + /* exhausted this generator */ + M[0][0].path = DONE; + return NULL; + } + if (M[iA][iB].trace & ENDPOINT) break; + } + M[iA][iB].path = 0; + m = M_MATRIX; + i = iA; + j = iB; + } + + /* Follow the traceback until we reach the origin. */ + while (1) { + switch (m) { + case Ix_MATRIX: + gapM = gaps[i][j].MIx; + gapXY = gaps[i][j].IyIx; + iB = j; + gap = *gapM; + if (gap) m = M_MATRIX; + else { + gap = *gapXY; + m = Iy_MATRIX; + } + iA = i - gap; + while (i > iA) M[--i][iB].path = VERTICAL; + break; + case Iy_MATRIX: + gapM = gaps[i][j].MIy; + gapXY = gaps[i][j].IxIy; + iA = i; + gap = *gapM; + if (gap) m = M_MATRIX; + else { + gap = *gapXY; + m = Ix_MATRIX; + } + iB = j - gap; + while (j > iB) M[iA][--j].path = HORIZONTAL; + break; + case M_MATRIX: + iA = i-1; + iB = j-1; + trace = M[i][j].trace; + if (trace & M_MATRIX) m = M_MATRIX; + else if (trace & Ix_MATRIX) m = Ix_MATRIX; + else if (trace & Iy_MATRIX) m = Iy_MATRIX; + else if (trace == STARTPOINT) { + self->iA = i; + self->iB = j; + return PathGenerator_create_path(self, i, j); + } + else { + PyErr_SetString(PyExc_RuntimeError, + "Unexpected trace in PathGenerator_next_waterman_smith_beyer_local"); + return NULL; + } + M[iA][iB].path = DIAGONAL; + break; + } + i = iA; + j = iB; + } +} + +static PyObject * +PathGenerator_next(PathGenerator* self) +{ + const Mode mode = self->mode; + const Algorithm algorithm = self->algorithm; + switch (algorithm) { + case NeedlemanWunschSmithWaterman: + switch (mode) { + case Global: + return PathGenerator_next_needlemanwunsch(self); + case Local: + return PathGenerator_next_smithwaterman(self); + } + case Gotoh: + switch (mode) { + case Global: + return PathGenerator_next_gotoh_global(self); + case Local: + return PathGenerator_next_gotoh_local(self); + } + case WatermanSmithBeyer: + switch (mode) { + case Global: + return PathGenerator_next_waterman_smith_beyer_global(self); + case Local: + return PathGenerator_next_waterman_smith_beyer_local(self); + } + case Unknown: + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown algorithm"); + return NULL; + } +} + +static const char PathGenerator_reset__doc__[] = "reset the iterator"; + +static PyObject* +PathGenerator_reset(PathGenerator* self) +{ + switch (self->mode) { + case Local: + self->iA = 0; + self->iB = 0; + case Global: { + Trace** M = self->M; + switch (self->algorithm) { + case NeedlemanWunschSmithWaterman: + case Gotoh: { + if (M[0][0].path != NONE) M[0][0].path = 0; + break; + } + case WatermanSmithBeyer: { + M[0][0].path = 0; + break; + } + case Unknown: + default: + break; + } + } + } + Py_INCREF(Py_None); + return Py_None; +} + +static PyMethodDef PathGenerator_methods[] = { + {"reset", + (PyCFunction)PathGenerator_reset, + METH_NOARGS, + PathGenerator_reset__doc__ + }, + {NULL} /* Sentinel */ +}; + +static PySequenceMethods PathGenerator_as_sequence = { + (lenfunc)PathGenerator_length, /* sq_length */ + NULL, /* sq_concat */ + NULL, /* sq_repeat */ + NULL, /* sq_item */ + NULL, /* sq_ass_item */ + NULL, /* sq_contains */ + NULL, /* sq_inplace_concat */ + NULL, /* sq_inplace_repeat */ +}; + +static PyTypeObject PathGenerator_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "Path generator", /* tp_name */ + sizeof(PathGenerator), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)PathGenerator_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + &PathGenerator_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)PathGenerator_next, /* tp_iternext */ + PathGenerator_methods, /* tp_methods */ +}; + +typedef struct { + PyObject_HEAD + Mode mode; + Algorithm algorithm; + double match; + double mismatch; + double epsilon; + double target_internal_open_gap_score; + double target_internal_extend_gap_score; + double target_left_open_gap_score; + double target_left_extend_gap_score; + double target_right_open_gap_score; + double target_right_extend_gap_score; + double query_internal_open_gap_score; + double query_internal_extend_gap_score; + double query_left_open_gap_score; + double query_left_extend_gap_score; + double query_right_open_gap_score; + double query_right_extend_gap_score; + PyObject* target_gap_function; + PyObject* query_gap_function; + Py_buffer substitution_matrix; + PyObject* alphabet; + int* mapping; + int wildcard; +} Aligner; + + +static Py_ssize_t +set_alphabet(Aligner* self, PyObject* alphabet) +{ + Py_ssize_t size; + if (alphabet == Py_None) { + if (self->alphabet) { + Py_DECREF(self->alphabet); + self->alphabet = NULL; + } + if (self->mapping) { + PyMem_Free(self->mapping); + self->mapping = NULL; + } + return 0; + } + else if (PyUnicode_Check(alphabet)) { + int* mapping; + int i; + int n; + int kind; + void* characters; + if (PyUnicode_READY(alphabet) == -1) return -1; + size = PyUnicode_GET_LENGTH(alphabet); + if (size == 0) { + PyErr_SetString(PyExc_ValueError, "alphabet has zero length"); + return -1; + } + kind = PyUnicode_KIND(alphabet); + switch (kind) { + case PyUnicode_1BYTE_KIND: { + n = 1 << 8 * sizeof(Py_UCS1); + break; + } + case PyUnicode_2BYTE_KIND: { + n = 1 << 8 * sizeof(Py_UCS2); + break; + } + case PyUnicode_4BYTE_KIND: { + n = 0x110000; /* Maximum code point in Unicode 6.0 + * is 0x10ffff = 1114111 */ + break; + } + case PyUnicode_WCHAR_KIND: + default: + PyErr_SetString(PyExc_ValueError, "could not interpret alphabet"); + return -1; + } + characters = PyUnicode_DATA(alphabet); + mapping = PyMem_Malloc(n*sizeof(int)); + if (!mapping) return -1; + for (i = 0; i < n; i++) mapping[i] = MISSING_LETTER; + for (i = 0; i < size; i++) { + Py_UCS4 character = PyUnicode_READ(kind, characters, i); + if (mapping[character] != MISSING_LETTER) { + PyObject* c = PyUnicode_FromKindAndData(kind, &character, 1); + PyErr_Format(PyExc_ValueError, + "alphabet contains '%S' more than once", c); + Py_XDECREF(c); + PyMem_Free(mapping); + return -1; + } + mapping[character] = i; + } + Py_INCREF(alphabet); + if (self->mapping) PyMem_Free(self->mapping); + self->mapping = mapping; + } + else { + /* alphabet is not a string; cannot use mapping */ + PyObject* sequence = PySequence_Fast(alphabet, + "alphabet should support the sequence protocol (e.g.,\n" + "strings, lists, and tuples can be valid alphabets)."); + if (!sequence) return -1; + size = PySequence_Fast_GET_SIZE(sequence); + Py_DECREF(sequence); + if (self->mapping) { + PyMem_Free(self->mapping); + self->mapping = NULL; + } + Py_INCREF(alphabet); + } + Py_XDECREF(self->alphabet); + self->alphabet = alphabet; + return size; +} + +static int +Aligner_init(Aligner *self, PyObject *args, PyObject *kwds) +{ + self->mode = Global; + self->match = 1.0; + self->mismatch = 0.0; + self->epsilon = 1.e-6; + self->target_internal_open_gap_score = 0; + self->target_internal_extend_gap_score = 0; + self->query_internal_open_gap_score = 0; + self->query_internal_extend_gap_score = 0; + self->target_left_open_gap_score = 0; + self->target_left_extend_gap_score = 0; + self->target_right_open_gap_score = 0; + self->target_right_extend_gap_score = 0; + self->query_left_open_gap_score = 0; + self->query_left_extend_gap_score = 0; + self->query_right_open_gap_score = 0; + self->query_right_extend_gap_score = 0; + self->target_gap_function = NULL; + self->query_gap_function = NULL; + self->substitution_matrix.obj = NULL; + self->substitution_matrix.buf = NULL; + self->algorithm = Unknown; + self->alphabet = NULL; + self->mapping = NULL; + self->wildcard = -1; + return 0; +} + +static void +Aligner_dealloc(Aligner* self) +{ Py_XDECREF(self->target_gap_function); + Py_XDECREF(self->query_gap_function); + if (self->substitution_matrix.obj) PyBuffer_Release(&self->substitution_matrix); + Py_XDECREF(self->alphabet); + Py_XDECREF(self->mapping); + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyObject* +Aligner_repr(Aligner* self) +{ + const char text[] = "Pairwise aligner, implementing the Needleman-Wunsch, Smith-Waterman, Gotoh, and Waterman-Smith-Beyer global and local alignment algorithms"; + return PyUnicode_FromString(text); +} + +static PyObject* +Aligner_str(Aligner* self) +{ + char text[1024]; + char* p = text; + PyObject* substitution_matrix = self->substitution_matrix.obj; + void* args[3]; + int n = 0; + PyObject* wildcard = NULL; + PyObject* s; + + p += sprintf(p, "Pairwise sequence aligner with parameters\n"); + if (substitution_matrix) { + p += sprintf(p, " substitution_matrix: <%s object at %p>\n", + Py_TYPE(substitution_matrix)->tp_name, + substitution_matrix); + } else { + if (self->wildcard == -1) { + p += sprintf(p, " wildcard: None\n"); + } + else { + wildcard = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, + &self->wildcard, 1); + if (!wildcard) return NULL; + p += sprintf(p, " wildcard: '%%U'\n"); + args[n++] = wildcard; + } + p += sprintf(p, " match_score: %f\n", self->match); + p += sprintf(p, " mismatch_score: %f\n", self->mismatch); + } + if (self->target_gap_function) { + p += sprintf(p, " target_gap_function: %%R\n"); + args[n++] = self->target_gap_function; + } + else { + p += sprintf(p, " target_internal_open_gap_score: %f\n", + self->target_internal_open_gap_score); + p += sprintf(p, " target_internal_extend_gap_score: %f\n", + self->target_internal_extend_gap_score); + p += sprintf(p, " target_left_open_gap_score: %f\n", + self->target_left_open_gap_score); + p += sprintf(p, " target_left_extend_gap_score: %f\n", + self->target_left_extend_gap_score); + p += sprintf(p, " target_right_open_gap_score: %f\n", + self->target_right_open_gap_score); + p += sprintf(p, " target_right_extend_gap_score: %f\n", + self->target_right_extend_gap_score); + } + if (self->query_gap_function) { + p += sprintf(p, " query_gap_function: %%R\n"); + args[n++] = self->query_gap_function; + } + else { + p += sprintf(p, " query_internal_open_gap_score: %f\n", + self->query_internal_open_gap_score); + p += sprintf(p, " query_internal_extend_gap_score: %f\n", + self->query_internal_extend_gap_score); + p += sprintf(p, " query_left_open_gap_score: %f\n", + self->query_left_open_gap_score); + p += sprintf(p, " query_left_extend_gap_score: %f\n", + self->query_left_extend_gap_score); + p += sprintf(p, " query_right_open_gap_score: %f\n", + self->query_right_open_gap_score); + p += sprintf(p, " query_right_extend_gap_score: %f\n", + self->query_right_extend_gap_score); + } + switch (self->mode) { + case Global: sprintf(p, " mode: global\n"); break; + case Local: sprintf(p, " mode: local\n"); break; + } + s = PyUnicode_FromFormat(text, args[0], args[1], args[2]); + Py_XDECREF(wildcard); + return s; +} + +static char Aligner_mode__doc__[] = "alignment mode ('global' or 'local')"; + +static PyObject* +Aligner_get_mode(Aligner* self, void* closure) +{ const char* message = NULL; + switch (self->mode) { + case Global: message = "global"; break; + case Local: message = "local"; break; + } + return PyUnicode_FromString(message); +} + +static int +Aligner_set_mode(Aligner* self, PyObject* value, void* closure) +{ + if (PyUnicode_Check(value)) { + if (PyUnicode_CompareWithASCIIString(value, "global") == 0) { + self->mode = Global; + return 0; + } + if (PyUnicode_CompareWithASCIIString(value, "local") == 0) { + self->mode = Local; + return 0; + } + } + PyErr_SetString(PyExc_ValueError, + "invalid mode (expected 'global' or 'local'"); + return -1; +} + +static char Aligner_match_score__doc__[] = "match score"; + +static PyObject* +Aligner_get_match_score(Aligner* self, void* closure) +{ if (self->substitution_matrix.obj) { + Py_INCREF(Py_None); + return Py_None; + } + return PyFloat_FromDouble(self->match); +} + +static int +Aligner_set_match_score(Aligner* self, PyObject* value, void* closure) +{ + const double match = PyFloat_AsDouble(value); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "invalid match score"); + return -1; + } + if (self->substitution_matrix.obj) { + if (set_alphabet(self, Py_None) < 0) return -1; + PyBuffer_Release(&self->substitution_matrix); + } + self->match = match; + return 0; +} + +static char Aligner_mismatch_score__doc__[] = "mismatch score"; + +static PyObject* +Aligner_get_mismatch_score(Aligner* self, void* closure) +{ if (self->substitution_matrix.obj) { + Py_INCREF(Py_None); + return Py_None; + } + return PyFloat_FromDouble(self->mismatch); +} + +static int +Aligner_set_mismatch_score(Aligner* self, PyObject* value, void* closure) +{ + const double mismatch = PyFloat_AsDouble(value); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "invalid mismatch score"); + return -1; + } + if (self->substitution_matrix.obj) { + if (set_alphabet(self, Py_None) < 0) return -1; + PyBuffer_Release(&self->substitution_matrix); + } + self->mismatch = mismatch; + return 0; +} + +static char Aligner_substitution_matrix__doc__[] = "substitution_matrix"; + +static PyObject* +Aligner_get_substitution_matrix(Aligner* self, void* closure) +{ PyObject* object = self->substitution_matrix.obj; + if (!object) object = Py_None; + Py_INCREF(object); + return object; +} + +static int +Aligner_set_substitution_matrix(Aligner* self, PyObject* values, void* closure) +{ + PyObject* alphabet; + Py_ssize_t size = -1; + Py_buffer view; + const int flag = PyBUF_FORMAT | PyBUF_ND; + if (values == Py_None) { + if (self->substitution_matrix.obj) + PyBuffer_Release(&self->substitution_matrix); + return 0; + } + if (PyObject_GetBuffer(values, &view, flag) != 0) { + PyErr_SetString(PyExc_ValueError, "expected a matrix"); + return -1; + } + if (view.ndim != 2) { + PyErr_Format(PyExc_ValueError, + "substitution matrix has incorrect rank (%d expected 2)", + view.ndim); + PyBuffer_Release(&view); + return -1; + } + if (view.len == 0) { + PyErr_SetString(PyExc_ValueError, "substitution matrix has zero size"); + PyBuffer_Release(&view); + return -1; + } + if (strcmp(view.format, "d") != 0) { + PyErr_SetString(PyExc_ValueError, + "substitution matrix should contain float values"); + PyBuffer_Release(&view); + return -1; + } + if (view.itemsize != sizeof(double)) { + PyErr_Format(PyExc_RuntimeError, + "substitution matrix has unexpected item byte size " + "(%zd, expected %zd)", view.itemsize, sizeof(double)); + PyBuffer_Release(&view); + return -1; + } + if (view.shape[0] != view.shape[1]) { + PyErr_Format(PyExc_ValueError, + "substitution matrix should be square " + "(found a %zd x %zd matrix)", + view.shape[0], view.shape[1]); + PyBuffer_Release(&view); + return -1; + } + alphabet = PyObject_GetAttrString(values, "alphabet"); + if (alphabet) { + size = set_alphabet(self, alphabet); + Py_DECREF(alphabet); + } else { + /* Set a substitution matrix without setting an alphabet; useful + * when aligning integers. */ + PyErr_Clear(); + size = set_alphabet(self, Py_None); + } + if (size < 0) { + PyBuffer_Release(&view); + return -1; + } + if (self->substitution_matrix.obj) PyBuffer_Release(&self->substitution_matrix); + self->substitution_matrix = view; + return 0; +} + +static char Aligner_alphabet__doc__[] = "alphabet"; + +static PyObject* +Aligner_get_alphabet(Aligner* self, void* closure) +{ PyObject* object = self->alphabet; + if (!object) object = Py_None; + Py_INCREF(object); + return object; +} + +static int +Aligner_set_alphabet(Aligner* self, PyObject* alphabet, void* closure) +{ + if (self->substitution_matrix.obj) { + PyErr_SetString(PyExc_AttributeError, + "can't set alphabet if a substitution matrix is used"); + return -1; + } + if (set_alphabet(self, alphabet) < 0) return -1; + return 0; +} + +static char Aligner_gap_score__doc__[] = "gap score"; + +static PyObject* +Aligner_get_gap_score(Aligner* self, void* closure) +{ + if (self->target_gap_function || self->query_gap_function) { + if (self->target_gap_function != self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + Py_INCREF(self->target_gap_function); + return self->target_gap_function; + } + else { + const double score = self->target_internal_open_gap_score; + if (score != self->target_internal_extend_gap_score + || score != self->target_left_open_gap_score + || score != self->target_left_extend_gap_score + || score != self->target_right_open_gap_score + || score != self->target_right_extend_gap_score + || score != self->query_internal_open_gap_score + || score != self->query_internal_extend_gap_score + || score != self->query_left_open_gap_score + || score != self->query_left_extend_gap_score + || score != self->query_right_open_gap_score + || score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_gap_score(Aligner* self, PyObject* value, void* closure) +{ if (PyCallable_Check(value)) { + Py_XDECREF(self->target_gap_function); + Py_XDECREF(self->query_gap_function); + Py_INCREF(value); + Py_INCREF(value); + self->target_gap_function = value; + self->query_gap_function = value; + } + else { + const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_internal_open_gap_score = score; + self->target_internal_extend_gap_score = score; + self->target_left_open_gap_score = score; + self->target_left_extend_gap_score = score; + self->target_right_open_gap_score = score; + self->target_right_extend_gap_score = score; + self->query_internal_open_gap_score = score; + self->query_internal_extend_gap_score = score; + self->query_left_open_gap_score = score; + self->query_left_extend_gap_score = score; + self->query_right_open_gap_score = score; + self->query_right_extend_gap_score = score; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_open_gap_score__doc__[] = "internal and end open gap score"; + +static PyObject* +Aligner_get_open_gap_score(Aligner* self, void* closure) +{ + if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_internal_open_gap_score; + if (score != self->target_left_open_gap_score + || score != self->target_right_open_gap_score + || score != self->query_internal_open_gap_score + || score != self->query_left_open_gap_score + || score != self->query_right_open_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_internal_open_gap_score = score; + self->target_left_open_gap_score = score; + self->target_right_open_gap_score = score; + self->query_internal_open_gap_score = score; + self->query_left_open_gap_score = score; + self->query_right_open_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_extend_gap_score__doc__[] = "extend gap score"; + +static PyObject* +Aligner_get_extend_gap_score(Aligner* self, void* closure) +{ + if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_internal_extend_gap_score; + if (score != self->target_left_extend_gap_score + || score != self->target_right_extend_gap_score + || score != self->query_internal_extend_gap_score + || score != self->query_left_extend_gap_score + || score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_internal_extend_gap_score = score; + self->target_left_extend_gap_score = score; + self->target_right_extend_gap_score = score; + self->query_internal_extend_gap_score = score; + self->query_left_extend_gap_score = score; + self->query_right_extend_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_internal_gap_score__doc__[] = "internal gap score"; + +static PyObject* +Aligner_get_internal_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_internal_open_gap_score; + if (score != self->target_internal_extend_gap_score + || score != self->query_internal_open_gap_score + || score != self->query_internal_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_internal_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_internal_open_gap_score = score; + self->target_internal_extend_gap_score = score; + self->query_internal_open_gap_score = score; + self->query_internal_extend_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_internal_open_gap_score__doc__[] = "internal open gap score"; + +static PyObject* +Aligner_get_internal_open_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_internal_open_gap_score; + if (score != self->query_internal_open_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_internal_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_internal_open_gap_score = score; + self->query_internal_open_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_internal_extend_gap_score__doc__[] = "internal extend gap score"; + +static PyObject* +Aligner_get_internal_extend_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_internal_extend_gap_score; + if (score != self->query_internal_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_internal_extend_gap_score(Aligner* self, PyObject* value, + void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_internal_extend_gap_score = score; + self->query_internal_extend_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_end_gap_score__doc__[] = "end gap score"; + +static PyObject* +Aligner_get_end_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_open_gap_score; + if (score != self->target_left_extend_gap_score + || score != self->target_right_open_gap_score + || score != self->target_right_extend_gap_score + || score != self->query_left_open_gap_score + || score != self->query_left_extend_gap_score + || score != self->query_right_open_gap_score + || score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_end_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_left_open_gap_score = score; + self->target_left_extend_gap_score = score; + self->target_right_open_gap_score = score; + self->target_right_extend_gap_score = score; + self->query_left_open_gap_score = score; + self->query_left_extend_gap_score = score; + self->query_right_open_gap_score = score; + self->query_right_extend_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_end_open_gap_score__doc__[] = "end open gap score"; + +static PyObject* +Aligner_get_end_open_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_open_gap_score; + if (score != self->target_right_open_gap_score + || score != self->query_left_open_gap_score + || score != self->query_right_open_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_end_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_left_open_gap_score = score; + self->target_right_open_gap_score = score; + self->query_left_open_gap_score = score; + self->query_right_open_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_end_extend_gap_score__doc__[] = "end extend gap score"; + +static PyObject* +Aligner_get_end_extend_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_extend_gap_score; + if (score != self->target_right_extend_gap_score + || score != self->query_left_extend_gap_score + || score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_end_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_left_extend_gap_score = score; + self->target_right_extend_gap_score = score; + self->query_left_extend_gap_score = score; + self->query_right_extend_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_left_gap_score__doc__[] = "left gap score"; + +static PyObject* +Aligner_get_left_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_open_gap_score; + if (score != self->target_left_extend_gap_score + || score != self->query_left_open_gap_score + || score != self->query_left_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_left_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_left_open_gap_score = score; + self->target_left_extend_gap_score = score; + self->query_left_open_gap_score = score; + self->query_left_extend_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_right_gap_score__doc__[] = "right gap score"; + +static PyObject* +Aligner_get_right_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_right_open_gap_score; + if (score != self->target_right_extend_gap_score + || score != self->query_right_open_gap_score + || score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_right_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_right_open_gap_score = score; + self->target_right_extend_gap_score = score; + self->query_right_open_gap_score = score; + self->query_right_extend_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_left_open_gap_score__doc__[] = "left open gap score"; + +static PyObject* +Aligner_get_left_open_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_open_gap_score; + if (score != self->query_left_open_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_left_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_left_open_gap_score = score; + self->query_left_open_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_left_extend_gap_score__doc__[] = "left extend gap score"; + +static PyObject* +Aligner_get_left_extend_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_extend_gap_score; + if (score != self->query_left_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_left_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_left_extend_gap_score = score; + self->query_left_extend_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_right_open_gap_score__doc__[] = "right open gap score"; + +static PyObject* +Aligner_get_right_open_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_right_open_gap_score; + if (score != self->query_right_open_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_right_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_right_open_gap_score = score; + self->query_right_open_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_right_extend_gap_score__doc__[] = "right extend gap score"; + +static PyObject* +Aligner_get_right_extend_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function || self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_right_extend_gap_score; + if (score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_right_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->target_right_extend_gap_score = score; + self->query_right_extend_gap_score = score; + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_open_gap_score__doc__[] = "target open gap score"; + +static PyObject* +Aligner_get_target_open_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_internal_open_gap_score; + if (score != self->target_left_open_gap_score + || score != self->target_right_open_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_target_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_internal_open_gap_score = score; + self->target_left_open_gap_score = score; + self->target_right_open_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_extend_gap_score__doc__[] = "target extend gap score"; + +static PyObject* +Aligner_get_target_extend_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_internal_extend_gap_score; + if (score != self->target_left_extend_gap_score + || score != self->target_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_target_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_internal_extend_gap_score = score; + self->target_left_extend_gap_score = score; + self->target_right_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_gap_score__doc__[] = "target gap score"; + +static PyObject* +Aligner_get_target_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + Py_INCREF(self->target_gap_function); + return self->target_gap_function; + } + else { + const double score = self->target_internal_open_gap_score; + if (score != self->target_internal_extend_gap_score + || score != self->target_left_open_gap_score + || score != self->target_left_extend_gap_score + || score != self->target_right_open_gap_score + || score != self->target_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_target_gap_score(Aligner* self, PyObject* value, void* closure) +{ + if (PyCallable_Check(value)) { + Py_XDECREF(self->target_gap_function); + Py_INCREF(value); + self->target_gap_function = value; + } + else { + const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "gap score should be numerical or callable"); + return -1; + } + self->target_internal_open_gap_score = score; + self->target_internal_extend_gap_score = score; + self->target_left_open_gap_score = score; + self->target_left_extend_gap_score = score; + self->target_right_open_gap_score = score; + self->target_right_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_open_gap_score__doc__[] = "query gap open score"; + +static PyObject* +Aligner_get_query_open_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->query_internal_open_gap_score; + if (score != self->query_left_open_gap_score + || score != self->query_right_open_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_query_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_internal_open_gap_score = score; + self->query_left_open_gap_score = score; + self->query_right_open_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_extend_gap_score__doc__[] = "query gap extend score"; + +static PyObject* +Aligner_get_query_extend_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->query_internal_extend_gap_score; + if (score != self->query_left_extend_gap_score + || score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_query_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_internal_extend_gap_score = score; + self->query_left_extend_gap_score = score; + self->query_right_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_gap_score__doc__[] = "query gap score"; + +static PyObject* +Aligner_get_query_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + Py_INCREF(self->query_gap_function); + return self->query_gap_function; + } + else { + const double score = self->query_internal_open_gap_score; + if (score != self->query_left_open_gap_score + || score != self->query_right_open_gap_score + || score != self->query_internal_extend_gap_score + || score != self->query_left_extend_gap_score + || score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_query_gap_score(Aligner* self, PyObject* value, void* closure) +{ if (PyCallable_Check(value)) { + Py_XDECREF(self->query_gap_function); + Py_INCREF(value); + self->query_gap_function = value; + } + else { + const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "gap score should be numerical or callable"); + return -1; + } + self->query_internal_open_gap_score = score; + self->query_internal_extend_gap_score = score; + self->query_left_open_gap_score = score; + self->query_left_extend_gap_score = score; + self->query_right_open_gap_score = score; + self->query_right_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_internal_open_gap_score__doc__[] = "target internal open gap score"; + +static PyObject* +Aligner_get_target_internal_open_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->target_internal_open_gap_score); +} + +static int +Aligner_set_target_internal_open_gap_score(Aligner* self, + PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_internal_open_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_internal_extend_gap_score__doc__[] = "target internal extend gap score"; + +static PyObject* +Aligner_get_target_internal_extend_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->target_internal_extend_gap_score); +} + +static int +Aligner_set_target_internal_extend_gap_score(Aligner* self, + PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_internal_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_internal_gap_score__doc__[] = "target internal gap score"; + +static PyObject* +Aligner_get_target_internal_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_internal_open_gap_score; + if (score != self->target_internal_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_target_internal_gap_score(Aligner* self, PyObject* value, + void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_internal_open_gap_score = score; + self->target_internal_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_end_gap_score__doc__[] = "target end gap score"; + +static PyObject* +Aligner_get_target_end_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_open_gap_score; + if (score != self->target_left_extend_gap_score + || score != self->target_right_open_gap_score + || score != self->target_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_target_end_gap_score(Aligner* self, PyObject* value, void* closure) { + const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_left_open_gap_score = score; + self->target_left_extend_gap_score = score; + self->target_right_open_gap_score = score; + self->target_right_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_end_open_gap_score__doc__[] = "target end open gap score"; + +static PyObject* +Aligner_get_target_end_open_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_open_gap_score; + if (score != self->target_right_open_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_target_end_open_gap_score(Aligner* self, PyObject* value, + void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_left_open_gap_score = score; + self->target_right_open_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_end_extend_gap_score__doc__[] = "target end extend gap score"; + +static PyObject* +Aligner_get_target_end_extend_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_extend_gap_score; + if (score != self->target_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_target_end_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_left_extend_gap_score = score; + self->target_right_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_left_open_gap_score__doc__[] = "target left open score"; + +static PyObject* +Aligner_get_target_left_open_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->target_left_open_gap_score); +} + +static int +Aligner_set_target_left_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_left_open_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_left_extend_gap_score__doc__[] = "target left extend score"; + +static PyObject* +Aligner_get_target_left_extend_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->target_left_extend_gap_score); +} + +static int +Aligner_set_target_left_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_left_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_left_gap_score__doc__[] = "target left score"; + +static PyObject* +Aligner_get_target_left_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_left_open_gap_score; + if (score != self->target_left_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_target_left_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_left_open_gap_score = score; + self->target_left_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_right_gap_score_open__doc__[] = "target right open score"; + +static PyObject* +Aligner_get_target_right_open_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->target_right_open_gap_score); +} + +static int +Aligner_set_target_right_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_right_open_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_right_extend_gap_score__doc__[] = "target right extend score"; + +static PyObject* +Aligner_get_target_right_extend_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->target_right_extend_gap_score); +} + +static int +Aligner_set_target_right_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_right_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_target_right_gap_score__doc__[] = "target right score"; + +static PyObject* +Aligner_get_target_right_gap_score(Aligner* self, void* closure) +{ if (self->target_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->target_right_open_gap_score; + if (score != self->target_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_target_right_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->target_right_open_gap_score = score; + self->target_right_extend_gap_score = score; + if (self->target_gap_function) { + Py_DECREF(self->target_gap_function); + self->target_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_end_gap_score__doc__[] = "query end score"; + +static PyObject* +Aligner_get_query_end_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->query_left_open_gap_score; + if (score != self->query_left_extend_gap_score + || score != self->query_right_open_gap_score + || score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_query_end_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_left_open_gap_score = score; + self->query_left_extend_gap_score = score; + self->query_right_open_gap_score = score; + self->query_right_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_end_open_gap_score__doc__[] = "query end open score"; + +static PyObject* +Aligner_get_query_end_open_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->query_left_open_gap_score; + if (score != self->query_right_open_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_query_end_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_left_open_gap_score = score; + self->query_right_open_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_end_extend_gap_score__doc__[] = "query end extend score"; + +static PyObject* +Aligner_get_query_end_extend_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->query_left_extend_gap_score; + if (score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_query_end_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_left_extend_gap_score = score; + self->query_right_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_internal_open_gap_score__doc__[] = "query internal open gap score"; + +static PyObject* +Aligner_get_query_internal_open_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->query_internal_open_gap_score); +} + +static int +Aligner_set_query_internal_open_gap_score(Aligner* self, PyObject* value, + void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_internal_open_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_internal_extend_gap_score__doc__[] = "query internal extend gap score"; + +static PyObject* +Aligner_get_query_internal_extend_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->query_internal_extend_gap_score); +} + +static int +Aligner_set_query_internal_extend_gap_score(Aligner* self, PyObject* value, + void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_internal_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_internal_gap_score__doc__[] = "query internal gap score"; + +static PyObject* +Aligner_get_query_internal_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->query_internal_open_gap_score; + if (score != self->query_internal_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_query_internal_gap_score(Aligner* self, PyObject* value, + void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_internal_open_gap_score = score; + self->query_internal_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_left_open_gap_score__doc__[] = "query left open score"; + +static PyObject* +Aligner_get_query_left_open_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->query_left_open_gap_score); +} + +static int +Aligner_set_query_left_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_left_open_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_left_extend_gap_score__doc__[] = "query left extend score"; + +static PyObject* +Aligner_get_query_left_extend_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->query_left_extend_gap_score); +} + +static int +Aligner_set_query_left_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_left_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_left_gap_score__doc__[] = "query left score"; + +static PyObject* +Aligner_get_query_left_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->query_left_open_gap_score; + if (score != self->query_left_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_query_left_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_left_open_gap_score = score; + self->query_left_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_right_open_gap_score__doc__[] = "query right open score"; + +static PyObject* +Aligner_get_query_right_open_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->query_right_open_gap_score); +} + +static int +Aligner_set_query_right_open_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_right_open_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_right_extend_gap_score__doc__[] = "query right extend score"; + +static PyObject* +Aligner_get_query_right_extend_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + return PyFloat_FromDouble(self->query_right_extend_gap_score); +} + +static int +Aligner_set_query_right_extend_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_right_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_query_right_gap_score__doc__[] = "query right score"; + +static PyObject* +Aligner_get_query_right_gap_score(Aligner* self, void* closure) +{ if (self->query_gap_function) { + PyErr_SetString(PyExc_ValueError, "using a gap score function"); + return NULL; + } + else { + const double score = self->query_right_open_gap_score; + if (score != self->query_right_extend_gap_score) { + PyErr_SetString(PyExc_ValueError, "gap scores are different"); + return NULL; + } + return PyFloat_FromDouble(score); + } +} + +static int +Aligner_set_query_right_gap_score(Aligner* self, PyObject* value, void* closure) +{ const double score = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->query_right_open_gap_score = score; + self->query_right_extend_gap_score = score; + if (self->query_gap_function) { + Py_DECREF(self->query_gap_function); + self->query_gap_function = NULL; + } + self->algorithm = Unknown; + return 0; +} + +static char Aligner_epsilon__doc__[] = "roundoff epsilon"; + +static PyObject* +Aligner_get_epsilon(Aligner* self, void* closure) +{ return PyFloat_FromDouble(self->epsilon); +} + +static int +Aligner_set_epsilon(Aligner* self, PyObject* value, void* closure) +{ const double epsilon = PyFloat_AsDouble(value); + if (PyErr_Occurred()) return -1; + self->epsilon = epsilon; + self->algorithm = Unknown; + return 0; +} + +static PyObject* +Aligner_get_wildcard(Aligner* self, void* closure) +{ + if (self->wildcard == -1) { + Py_INCREF(Py_None); + return Py_None; + } + else { + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &self->wildcard, 1); + } +} + +static int +Aligner_set_wildcard(Aligner* self, PyObject* value, void* closure) +{ + if (value == Py_None) { + self->wildcard = -1; + return 0; + } + if (!PyUnicode_Check(value)) { + PyErr_SetString(PyExc_TypeError, + "wildcard should be a single character, or None"); + return -1; + } + if (PyUnicode_READY(value) == -1) return -1; + if (PyUnicode_GET_LENGTH(value) != 1) { + PyErr_SetString(PyExc_ValueError, + "wildcard should be a single character, or None"); + return -1; + } + self->wildcard = PyUnicode_READ_CHAR(value, 0); + return 0; +} + +static char Aligner_wildcard__doc__[] = "wildcard character"; + +static Algorithm _get_algorithm(Aligner* self) +{ + Algorithm algorithm = self->algorithm; + if (algorithm == Unknown) { + const double target_gap_open = self->target_internal_open_gap_score; + const double query_gap_open = self->query_internal_open_gap_score; + const double target_gap_extend = self->target_internal_extend_gap_score; + const double query_gap_extend = self->query_internal_extend_gap_score; + const double target_left_open = self->target_left_open_gap_score; + const double target_left_extend = self->target_left_extend_gap_score; + const double query_left_open = self->query_left_open_gap_score; + const double target_right_open = self->target_right_open_gap_score; + const double query_right_open = self->query_right_open_gap_score; + const double target_right_extend = self->target_right_extend_gap_score; + const double query_left_extend = self->query_left_extend_gap_score; + const double query_right_extend = self->query_right_extend_gap_score; + if (self->target_gap_function || self->query_gap_function) + algorithm = WatermanSmithBeyer; + else if (target_gap_open == target_gap_extend + && query_gap_open == query_gap_extend + && target_left_open == target_left_extend + && target_right_open == target_right_extend + && query_left_open == query_left_extend + && query_right_open == query_right_extend) + algorithm = NeedlemanWunschSmithWaterman; + else + algorithm = Gotoh; + self->algorithm = algorithm; + } + return algorithm; +} + + +static char Aligner_algorithm__doc__[] = "alignment algorithm"; + +static PyObject* +Aligner_get_algorithm(Aligner* self, void* closure) +{ + const char* s = NULL; + const Mode mode = self->mode; + const Algorithm algorithm = _get_algorithm(self); + switch (algorithm) { + case NeedlemanWunschSmithWaterman: + switch (mode) { + case Global: + s = "Needleman-Wunsch"; + break; + case Local: + s = "Smith-Waterman"; + break; + } + break; + case Gotoh: + switch (mode) { + case Global: + s = "Gotoh global alignment algorithm"; + break; + case Local: + s = "Gotoh local alignment algorithm"; + break; + } + break; + case WatermanSmithBeyer: + switch (mode) { + case Global: + s = "Waterman-Smith-Beyer global alignment algorithm"; + break; + case Local: + s = "Waterman-Smith-Beyer local alignment algorithm"; + break; + } + break; + case Unknown: + default: + break; + } + return PyUnicode_FromString(s); +} + +static PyGetSetDef Aligner_getset[] = { + {"mode", + (getter)Aligner_get_mode, + (setter)Aligner_set_mode, + Aligner_mode__doc__, NULL}, + {"match_score", + (getter)Aligner_get_match_score, + (setter)Aligner_set_match_score, + Aligner_match_score__doc__, NULL}, + {"mismatch_score", + (getter)Aligner_get_mismatch_score, + (setter)Aligner_set_mismatch_score, + Aligner_mismatch_score__doc__, NULL}, + {"match", /* synonym for match_score */ + (getter)Aligner_get_match_score, + (setter)Aligner_set_match_score, + Aligner_match_score__doc__, NULL}, + {"mismatch", /* synonym for mismatch_score */ + (getter)Aligner_get_mismatch_score, + (setter)Aligner_set_mismatch_score, + Aligner_mismatch_score__doc__, NULL}, + {"substitution_matrix", + (getter)Aligner_get_substitution_matrix, + (setter)Aligner_set_substitution_matrix, + Aligner_substitution_matrix__doc__, NULL}, + {"alphabet", + (getter)Aligner_get_alphabet, + (setter)Aligner_set_alphabet, + Aligner_alphabet__doc__, NULL}, + {"gap_score", + (getter)Aligner_get_gap_score, + (setter)Aligner_set_gap_score, + Aligner_gap_score__doc__, NULL}, + {"open_gap_score", + (getter)Aligner_get_open_gap_score, + (setter)Aligner_set_open_gap_score, + Aligner_open_gap_score__doc__, NULL}, + {"extend_gap_score", + (getter)Aligner_get_extend_gap_score, + (setter)Aligner_set_extend_gap_score, + Aligner_extend_gap_score__doc__, NULL}, + {"internal_gap_score", + (getter)Aligner_get_internal_gap_score, + (setter)Aligner_set_internal_gap_score, + Aligner_internal_gap_score__doc__, NULL}, + {"internal_open_gap_score", + (getter)Aligner_get_internal_open_gap_score, + (setter)Aligner_set_internal_open_gap_score, + Aligner_internal_open_gap_score__doc__, NULL}, + {"internal_extend_gap_score", + (getter)Aligner_get_internal_extend_gap_score, + (setter)Aligner_set_internal_extend_gap_score, + Aligner_internal_extend_gap_score__doc__, NULL}, + {"end_gap_score", + (getter)Aligner_get_end_gap_score, + (setter)Aligner_set_end_gap_score, + Aligner_end_gap_score__doc__, NULL}, + {"end_open_gap_score", + (getter)Aligner_get_end_open_gap_score, + (setter)Aligner_set_end_open_gap_score, + Aligner_end_open_gap_score__doc__, NULL}, + {"end_extend_gap_score", + (getter)Aligner_get_end_extend_gap_score, + (setter)Aligner_set_end_extend_gap_score, + Aligner_end_extend_gap_score__doc__, NULL}, + {"left_gap_score", + (getter)Aligner_get_left_gap_score, + (setter)Aligner_set_left_gap_score, + Aligner_left_gap_score__doc__, NULL}, + {"left_open_gap_score", + (getter)Aligner_get_left_open_gap_score, + (setter)Aligner_set_left_open_gap_score, + Aligner_left_open_gap_score__doc__, NULL}, + {"left_extend_gap_score", + (getter)Aligner_get_left_extend_gap_score, + (setter)Aligner_set_left_extend_gap_score, + Aligner_left_extend_gap_score__doc__, NULL}, + {"right_gap_score", + (getter)Aligner_get_right_gap_score, + (setter)Aligner_set_right_gap_score, + Aligner_right_gap_score__doc__, NULL}, + {"right_open_gap_score", + (getter)Aligner_get_right_open_gap_score, + (setter)Aligner_set_right_open_gap_score, + Aligner_right_open_gap_score__doc__, NULL}, + {"right_extend_gap_score", + (getter)Aligner_get_right_extend_gap_score, + (setter)Aligner_set_right_extend_gap_score, + Aligner_right_extend_gap_score__doc__, NULL}, + {"target_open_gap_score", + (getter)Aligner_get_target_open_gap_score, + (setter)Aligner_set_target_open_gap_score, + Aligner_target_open_gap_score__doc__, NULL}, + {"target_extend_gap_score", + (getter)Aligner_get_target_extend_gap_score, + (setter)Aligner_set_target_extend_gap_score, + Aligner_target_extend_gap_score__doc__, NULL}, + {"target_gap_score", + (getter)Aligner_get_target_gap_score, + (setter)Aligner_set_target_gap_score, + Aligner_target_gap_score__doc__, NULL}, + {"query_open_gap_score", + (getter)Aligner_get_query_open_gap_score, + (setter)Aligner_set_query_open_gap_score, + Aligner_query_open_gap_score__doc__, NULL}, + {"query_extend_gap_score", + (getter)Aligner_get_query_extend_gap_score, + (setter)Aligner_set_query_extend_gap_score, + Aligner_query_extend_gap_score__doc__, NULL}, + {"query_gap_score", + (getter)Aligner_get_query_gap_score, + (setter)Aligner_set_query_gap_score, + Aligner_query_gap_score__doc__, NULL}, + {"target_end_gap_score", + (getter)Aligner_get_target_end_gap_score, + (setter)Aligner_set_target_end_gap_score, + Aligner_target_end_gap_score__doc__, NULL}, + {"target_end_open_gap_score", + (getter)Aligner_get_target_end_open_gap_score, + (setter)Aligner_set_target_end_open_gap_score, + Aligner_target_end_open_gap_score__doc__, NULL}, + {"target_end_extend_gap_score", + (getter)Aligner_get_target_end_extend_gap_score, + (setter)Aligner_set_target_end_extend_gap_score, + Aligner_target_end_extend_gap_score__doc__, NULL}, + {"target_internal_open_gap_score", + (getter)Aligner_get_target_internal_open_gap_score, + (setter)Aligner_set_target_internal_open_gap_score, + Aligner_target_internal_open_gap_score__doc__, NULL}, + {"target_internal_extend_gap_score", + (getter)Aligner_get_target_internal_extend_gap_score, + (setter)Aligner_set_target_internal_extend_gap_score, + Aligner_target_internal_extend_gap_score__doc__, NULL}, + {"target_internal_gap_score", + (getter)Aligner_get_target_internal_gap_score, + (setter)Aligner_set_target_internal_gap_score, + Aligner_target_internal_gap_score__doc__, NULL}, + {"target_left_open_gap_score", + (getter)Aligner_get_target_left_open_gap_score, + (setter)Aligner_set_target_left_open_gap_score, + Aligner_target_left_open_gap_score__doc__, NULL}, + {"target_left_extend_gap_score", + (getter)Aligner_get_target_left_extend_gap_score, + (setter)Aligner_set_target_left_extend_gap_score, + Aligner_target_left_extend_gap_score__doc__, NULL}, + {"target_left_gap_score", + (getter)Aligner_get_target_left_gap_score, + (setter)Aligner_set_target_left_gap_score, + Aligner_target_left_gap_score__doc__, NULL}, + {"target_right_open_gap_score", + (getter)Aligner_get_target_right_open_gap_score, + (setter)Aligner_set_target_right_open_gap_score, + Aligner_target_right_gap_score_open__doc__, NULL}, + {"target_right_extend_gap_score", + (getter)Aligner_get_target_right_extend_gap_score, + (setter)Aligner_set_target_right_extend_gap_score, + Aligner_target_right_extend_gap_score__doc__, NULL}, + {"target_right_gap_score", + (getter)Aligner_get_target_right_gap_score, + (setter)Aligner_set_target_right_gap_score, + Aligner_target_right_gap_score__doc__, NULL}, + {"query_end_gap_score", + (getter)Aligner_get_query_end_gap_score, + (setter)Aligner_set_query_end_gap_score, + Aligner_query_end_gap_score__doc__, NULL}, + {"query_end_open_gap_score", + (getter)Aligner_get_query_end_open_gap_score, + (setter)Aligner_set_query_end_open_gap_score, + Aligner_query_end_open_gap_score__doc__, NULL}, + {"query_end_extend_gap_score", + (getter)Aligner_get_query_end_extend_gap_score, + (setter)Aligner_set_query_end_extend_gap_score, + Aligner_query_end_extend_gap_score__doc__, NULL}, + {"query_internal_open_gap_score", + (getter)Aligner_get_query_internal_open_gap_score, + (setter)Aligner_set_query_internal_open_gap_score, + Aligner_query_internal_open_gap_score__doc__, NULL}, + {"query_internal_extend_gap_score", + (getter)Aligner_get_query_internal_extend_gap_score, + (setter)Aligner_set_query_internal_extend_gap_score, + Aligner_query_internal_extend_gap_score__doc__, NULL}, + {"query_internal_gap_score", + (getter)Aligner_get_query_internal_gap_score, + (setter)Aligner_set_query_internal_gap_score, + Aligner_query_internal_gap_score__doc__, NULL}, + {"query_left_open_gap_score", + (getter)Aligner_get_query_left_open_gap_score, + (setter)Aligner_set_query_left_open_gap_score, + Aligner_query_left_open_gap_score__doc__, NULL}, + {"query_left_extend_gap_score", + (getter)Aligner_get_query_left_extend_gap_score, + (setter)Aligner_set_query_left_extend_gap_score, + Aligner_query_left_extend_gap_score__doc__, NULL}, + {"query_left_gap_score", + (getter)Aligner_get_query_left_gap_score, + (setter)Aligner_set_query_left_gap_score, + Aligner_query_left_gap_score__doc__, NULL}, + {"query_right_open_gap_score", + (getter)Aligner_get_query_right_open_gap_score, + (setter)Aligner_set_query_right_open_gap_score, + Aligner_query_right_open_gap_score__doc__, NULL}, + {"query_right_extend_gap_score", + (getter)Aligner_get_query_right_extend_gap_score, + (setter)Aligner_set_query_right_extend_gap_score, + Aligner_query_right_extend_gap_score__doc__, NULL}, + {"query_right_gap_score", + (getter)Aligner_get_query_right_gap_score, + (setter)Aligner_set_query_right_gap_score, + Aligner_query_right_gap_score__doc__, NULL}, + {"epsilon", + (getter)Aligner_get_epsilon, + (setter)Aligner_set_epsilon, + Aligner_epsilon__doc__, NULL}, + {"wildcard", + (getter)Aligner_get_wildcard, + (setter)Aligner_set_wildcard, + Aligner_wildcard__doc__, NULL}, + {"algorithm", + (getter)Aligner_get_algorithm, + (setter)NULL, + Aligner_algorithm__doc__, NULL}, + {NULL} /* Sentinel */ +}; + +#define SELECT_SCORE_GLOBAL(score1, score2, score3) \ + score = score1; \ + temp = score2; \ + if (temp > score) score = temp; \ + temp = score3; \ + if (temp > score) score = temp; + +#define SELECT_SCORE_WATERMAN_SMITH_BEYER(score1, score2) \ + temp = score1 + gapscore; \ + if (temp > score) score = temp; \ + temp = score2 + gapscore; \ + if (temp > score) score = temp; + +#define SELECT_SCORE_GOTOH_LOCAL_ALIGN(score1, score2, score3, score4) \ + score = score1; \ + temp = score2; \ + if (temp > score) score = temp; \ + temp = score3; \ + if (temp > score) score = temp; \ + score += score4; \ + if (score < 0) score = 0; \ + else if (score > maximum) maximum = score; + +#define SELECT_SCORE_LOCAL3(score1, score2, score3) \ + score = score1; \ + temp = score2; \ + if (temp > score) score = temp; \ + temp = score3; \ + if (temp > score) score = temp; \ + if (score < 0) score = 0; \ + else if (score > maximum) maximum = score; + +#define SELECT_SCORE_LOCAL1(score1) \ + score = score1; \ + if (score < 0) score = 0; \ + else if (score > maximum) maximum = score; + +#define SELECT_TRACE_NEEDLEMAN_WUNSCH(hgap, vgap, align_score) \ + score = temp + (align_score); \ + trace = DIAGONAL; \ + temp = row[j-1] + hgap; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = HORIZONTAL; \ + } \ + else if (temp > score - epsilon) trace |= HORIZONTAL; \ + temp = row[j] + vgap; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = VERTICAL; \ + } \ + else if (temp > score - epsilon) trace |= VERTICAL; \ + temp = row[j]; \ + row[j] = score; \ + M[i][j].trace = trace; + +#define SELECT_TRACE_SMITH_WATERMAN_HVD(align_score) \ + trace = DIAGONAL; \ + score = temp + (align_score); \ + temp = row[j-1] + gap_extend_A; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = HORIZONTAL; \ + } \ + else if (temp > score - epsilon) trace |= HORIZONTAL; \ + temp = row[j] + gap_extend_B; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = VERTICAL; \ + } \ + else if (temp > score - epsilon) trace |= VERTICAL; \ + if (score < epsilon) { \ + score = 0; \ + trace = STARTPOINT; \ + } \ + else if (trace & DIAGONAL && score > maximum - epsilon) { \ + if (score > maximum + epsilon) { \ + for ( ; im < i; im++, jm = 0) \ + for ( ; jm <= nB; jm++) M[im][jm].trace &= ~ENDPOINT; \ + for ( ; jm < j; jm++) M[im][jm].trace &= ~ENDPOINT; \ + im = i; \ + jm = j; \ + } \ + trace |= ENDPOINT; \ + } \ + M[i][j].trace = trace; \ + if (score > maximum) maximum = score; \ + temp = row[j]; \ + row[j] = score; + +#define SELECT_TRACE_SMITH_WATERMAN_D(align_score) \ + score = temp + (align_score); \ + trace = DIAGONAL; \ + if (score < epsilon) { \ + score = 0; \ + } \ + else if (trace & DIAGONAL && score > maximum - epsilon) { \ + if (score > maximum + epsilon) { \ + for ( ; im < i; im++, jm = 0) \ + for ( ; jm <= nB; jm++) M[im][jm].trace &= ~ENDPOINT; \ + for ( ; jm < j; jm++) M[im][jm].trace &= ~ENDPOINT; \ + im = i; \ + jm = j; \ + } \ + trace |= ENDPOINT; \ + } \ + M[i][j].trace = trace; \ + if (score > maximum) maximum = score; \ + temp = row[j]; \ + row[j] = score + +#define SELECT_TRACE_GOTOH_GLOBAL_GAP(matrix, score1, score2, score3) \ + trace = M_MATRIX; \ + score = score1; \ + temp = score2; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = Ix_MATRIX; \ + } \ + else if (temp > score - epsilon) trace |= Ix_MATRIX; \ + temp = score3; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = Iy_MATRIX; \ + } \ + else if (temp > score - epsilon) trace |= Iy_MATRIX; \ + gaps[i][j].matrix = trace; + +#define SELECT_TRACE_GOTOH_GLOBAL_ALIGN \ + trace = M_MATRIX; \ + score = M_temp; \ + temp = Ix_temp; \ + if (temp > score + epsilon) { \ + score = Ix_temp; \ + trace = Ix_MATRIX; \ + } \ + else if (temp > score - epsilon) trace |= Ix_MATRIX; \ + temp = Iy_temp; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = Iy_MATRIX; \ + } \ + else if (temp > score - epsilon) trace |= Iy_MATRIX; \ + M[i][j].trace = trace; + +#define SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \ + trace = M_MATRIX; \ + score = M_temp; \ + if (Ix_temp > score + epsilon) { \ + score = Ix_temp; \ + trace = Ix_MATRIX; \ + } \ + else if (Ix_temp > score - epsilon) trace |= Ix_MATRIX; \ + if (Iy_temp > score + epsilon) { \ + score = Iy_temp; \ + trace = Iy_MATRIX; \ + } \ + else if (Iy_temp > score - epsilon) trace |= Iy_MATRIX; \ + score += (align_score); \ + if (score < epsilon) { \ + score = 0; \ + trace = STARTPOINT; \ + } \ + else if (score > maximum - epsilon) { \ + if (score > maximum + epsilon) { \ + maximum = score; \ + for ( ; im < i; im++, jm = 0) \ + for ( ; jm <= nB; jm++) M[im][jm].trace &= ~ENDPOINT; \ + for ( ; jm < j; jm++) M[im][jm].trace &= ~ENDPOINT; \ + im = i; \ + jm = j; \ + } \ + trace |= ENDPOINT; \ + } \ + M[i][j].trace = trace; + +#define SELECT_TRACE_GOTOH_LOCAL_GAP(matrix, score1, score2, score3) \ + trace = M_MATRIX; \ + score = score1; \ + temp = score2; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = Ix_MATRIX; \ + } \ + else if (temp > score - epsilon) trace |= Ix_MATRIX; \ + temp = score3; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = Iy_MATRIX; \ + } \ + else if (temp > score - epsilon) trace |= Iy_MATRIX; \ + if (score < epsilon) { \ + score = -DBL_MAX; \ + trace = 0; \ + } \ + gaps[i][j].matrix = trace; + +#define SELECT_TRACE_WATERMAN_SMITH_BEYER_GLOBAL_ALIGN(score4) \ + trace = M_MATRIX; \ + score = M_row[i-1][j-1]; \ + temp = Ix_row[i-1][j-1]; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = Ix_MATRIX; \ + } \ + else if (temp > score - epsilon) trace |= Ix_MATRIX; \ + temp = Iy_row[i-1][j-1]; \ + if (temp > score + epsilon) { \ + score = temp; \ + trace = Iy_MATRIX; \ + } \ + else if (temp > score - epsilon) trace |= Iy_MATRIX; \ + M_row[i][j] = score + score4; \ + M[i][j].trace = trace; + +#define SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(score1, score2) \ + temp = score1 + gapscore; \ + if (temp > score - epsilon) { \ + if (temp > score + epsilon) { \ + score = temp; \ + nm = 0; \ + ng = 0; \ + } \ + gapM[nm] = gap; \ + nm++; \ + } \ + temp = score2 + gapscore; \ + if (temp > score - epsilon) { \ + if (temp > score + epsilon) { \ + score = temp; \ + nm = 0; \ + ng = 0; \ + } \ + gapXY[ng] = gap; \ + ng++; \ + } + +#define SELECT_TRACE_WATERMAN_SMITH_BEYER_ALIGN(score1, score2, score3, score4) \ + trace = M_MATRIX; \ + score = score1; \ + if (score2 > score + epsilon) { \ + score = score2; \ + trace = Ix_MATRIX; \ + } \ + else if (score2 > score - epsilon) trace |= Ix_MATRIX; \ + if (score3 > score + epsilon) { \ + score = score3; \ + trace = Iy_MATRIX; \ + } \ + else if (score3 > score - epsilon) trace |= Iy_MATRIX; \ + score += score4; \ + if (score < epsilon) { \ + score = 0; \ + trace = STARTPOINT; \ + } \ + else if (score > maximum - epsilon) { \ + if (score > maximum + epsilon) { \ + maximum = score; \ + for ( ; im < i; im++, jm = 0) \ + for ( ; jm <= nB; jm++) M[im][jm].trace &= ~ENDPOINT; \ + for ( ; jm < j; jm++) M[im][jm].trace &= ~ENDPOINT; \ + im = i; \ + jm = j; \ + } \ + trace |= ENDPOINT; \ + } \ + M_row[i][j] = score; \ + M[i][j].trace = trace; + +/* ----------------- alignment algorithms ----------------- */ + +#define NEEDLEMANWUNSCH_SCORE(align_score) \ + int i; \ + int j; \ + int kA; \ + int kB; \ + const double gap_extend_A = self->target_internal_extend_gap_score; \ + const double gap_extend_B = self->query_internal_extend_gap_score; \ + double score; \ + double temp; \ + double* row; \ + double left_gap_extend_A; \ + double right_gap_extend_A; \ + double left_gap_extend_B; \ + double right_gap_extend_B; \ + switch (strand) { \ + case '+': \ + left_gap_extend_A = self->target_left_extend_gap_score; \ + right_gap_extend_A = self->target_right_extend_gap_score; \ + left_gap_extend_B = self->query_left_extend_gap_score; \ + right_gap_extend_B = self->query_right_extend_gap_score; \ + break; \ + case '-': \ + left_gap_extend_A = self->target_right_extend_gap_score; \ + right_gap_extend_A = self->target_left_extend_gap_score; \ + left_gap_extend_B = self->query_right_extend_gap_score; \ + right_gap_extend_B = self->query_left_extend_gap_score; \ + break; \ + default: \ + PyErr_SetString(PyExc_RuntimeError, "strand was neither '+' nor '-'"); \ + return NULL; \ + } \ +\ + /* Needleman-Wunsch algorithm */ \ + row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!row) return PyErr_NoMemory(); \ +\ + /* The top row of the score matrix is a special case, \ + * as there are no previously aligned characters. \ + */ \ + row[0] = 0.0; \ + for (j = 1; j <= nB; j++) row[j] = j * left_gap_extend_A; \ + for (i = 1; i < nA; i++) { \ + kA = sA[i-1]; \ + temp = row[0]; \ + row[0] = i * left_gap_extend_B; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_GLOBAL(temp + (align_score), \ + row[j] + gap_extend_B, \ + row[j-1] + gap_extend_A); \ + temp = row[j]; \ + row[j] = score; \ + } \ + kB = sB[nB-1]; \ + SELECT_SCORE_GLOBAL(temp + (align_score), \ + row[nB] + right_gap_extend_B, \ + row[nB-1] + gap_extend_A); \ + temp = row[nB]; \ + row[nB] = score; \ + } \ + kA = sA[nA-1]; \ + temp = row[0]; \ + row[0] = nA * right_gap_extend_B; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_GLOBAL(temp + (align_score), \ + row[j] + gap_extend_B, \ + row[j-1] + right_gap_extend_A); \ + temp = row[j]; \ + row[j] = score; \ + } \ + kB = sB[nB-1]; \ + SELECT_SCORE_GLOBAL(temp + (align_score), \ + row[nB] + right_gap_extend_B, \ + row[nB-1] + right_gap_extend_A); \ + PyMem_Free(row); \ + return PyFloat_FromDouble(score); + + +#define SMITHWATERMAN_SCORE(align_score) \ + int i; \ + int j; \ + int kA; \ + int kB; \ + const double gap_extend_A = self->target_internal_extend_gap_score; \ + const double gap_extend_B = self->query_internal_extend_gap_score; \ + double score; \ + double* row; \ + double temp; \ + double maximum = 0; \ +\ + /* Smith-Waterman algorithm */ \ + row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!row) return PyErr_NoMemory(); \ +\ + /* The top row of the score matrix is a special case, \ + * as there are no previously aligned characters. \ + */ \ + for (j = 0; j <= nB; j++) \ + row[j] = 0; \ + for (i = 1; i < nA; i++) { \ + kA = sA[i-1]; \ + temp = 0; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_LOCAL3(temp + (align_score), \ + row[j] + gap_extend_B, \ + row[j-1] + gap_extend_A); \ + temp = row[j]; \ + row[j] = score; \ + } \ + kB = sB[nB-1]; \ + SELECT_SCORE_LOCAL1(temp + (align_score)); \ + temp = row[nB]; \ + row[nB] = score; \ + } \ + kA = sA[nA-1]; \ + temp = 0; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_LOCAL1(temp + (align_score)); \ + temp = row[j]; \ + row[j] = score; \ + } \ + kB = sB[nB-1]; \ + SELECT_SCORE_LOCAL1(temp + (align_score)); \ + PyMem_Free(row); \ + return PyFloat_FromDouble(maximum); + + +#define NEEDLEMANWUNSCH_ALIGN(align_score) \ + int i; \ + int j; \ + int kA; \ + int kB; \ + const double gap_extend_A = self->target_internal_extend_gap_score; \ + const double gap_extend_B = self->query_internal_extend_gap_score; \ + const double epsilon = self->epsilon; \ + Trace** M; \ + double score; \ + int trace; \ + double temp; \ + double* row = NULL; \ + PathGenerator* paths; \ + double left_gap_extend_A; \ + double right_gap_extend_A; \ + double left_gap_extend_B; \ + double right_gap_extend_B; \ + switch (strand) { \ + case '+': \ + left_gap_extend_A = self->target_left_extend_gap_score; \ + right_gap_extend_A = self->target_right_extend_gap_score; \ + left_gap_extend_B = self->query_left_extend_gap_score; \ + right_gap_extend_B = self->query_right_extend_gap_score; \ + break; \ + case '-': \ + left_gap_extend_A = self->target_right_extend_gap_score; \ + right_gap_extend_A = self->target_left_extend_gap_score; \ + left_gap_extend_B = self->query_right_extend_gap_score; \ + right_gap_extend_B = self->query_left_extend_gap_score; \ + break; \ + default: \ + PyErr_SetString(PyExc_RuntimeError, "strand was neither '+' nor '-'"); \ + return NULL; \ + } \ +\ + /* Needleman-Wunsch algorithm */ \ + paths = PathGenerator_create_NWSW(nA, nB, Global, strand); \ + if (!paths) return NULL; \ + row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!row) { \ + Py_DECREF(paths); \ + return PyErr_NoMemory(); \ + } \ + M = paths->M; \ + row[0] = 0; \ + for (j = 1; j <= nB; j++) row[j] = j * left_gap_extend_A; \ + for (i = 1; i < nA; i++) { \ + temp = row[0]; \ + row[0] = i * left_gap_extend_B; \ + kA = sA[i-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_TRACE_NEEDLEMAN_WUNSCH(gap_extend_A, gap_extend_B, align_score); \ + } \ + kB = sB[j-1]; \ + SELECT_TRACE_NEEDLEMAN_WUNSCH(gap_extend_A, right_gap_extend_B, align_score); \ + } \ + temp = row[0]; \ + row[0] = i * left_gap_extend_B; \ + kA = sA[nA-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_TRACE_NEEDLEMAN_WUNSCH(right_gap_extend_A, gap_extend_B, align_score); \ + } \ + kB = sB[j-1]; \ + SELECT_TRACE_NEEDLEMAN_WUNSCH(right_gap_extend_A, right_gap_extend_B, align_score); \ + PyMem_Free(row); \ + M[nA][nB].path = 0; \ + return Py_BuildValue("fN", score, paths); + + +#define SMITHWATERMAN_ALIGN(align_score) \ + int i; \ + int j; \ + int im = nA; \ + int jm = nB; \ + int kA; \ + int kB; \ + const double gap_extend_A = self->target_internal_extend_gap_score; \ + const double gap_extend_B = self->query_internal_extend_gap_score; \ + const double epsilon = self->epsilon; \ + Trace** M = NULL; \ + double maximum = 0; \ + double score = 0; \ + double* row = NULL; \ + double temp; \ + int trace; \ + PathGenerator* paths = NULL; \ +\ + /* Smith-Waterman algorithm */ \ + paths = PathGenerator_create_NWSW(nA, nB, Local, strand); \ + if (!paths) return NULL; \ + row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!row) { \ + Py_DECREF(paths); \ + return PyErr_NoMemory(); \ + } \ + M = paths->M; \ + for (j = 0; j <= nB; j++) row[j] = 0; \ + for (i = 1; i < nA; i++) { \ + temp = 0; \ + kA = sA[i-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_TRACE_SMITH_WATERMAN_HVD(align_score); \ + } \ + kB = sB[nB-1]; \ + SELECT_TRACE_SMITH_WATERMAN_D(align_score); \ + } \ + temp = 0; \ + kA = sA[nA-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_TRACE_SMITH_WATERMAN_D(align_score); \ + } \ + kB = sB[nB-1]; \ + SELECT_TRACE_SMITH_WATERMAN_D(align_score); \ + PyMem_Free(row); \ +\ + /* As we don't allow zero-score extensions to alignments, \ + * we need to remove all traces towards an ENDPOINT. \ + * In addition, some points then won't have any path to a STARTPOINT. \ + * Here, use path as a temporary variable to indicate if the point \ + * is reachable from a STARTPOINT. If it is unreachable, remove all \ + * traces from it, and don't allow it to be an ENDPOINT. It may still \ + * be a valid STARTPOINT. */ \ + for (j = 0; j <= nB; j++) M[0][j].path = 1; \ + for (i = 1; i <= nA; i++) { \ + M[i][0].path = 1; \ + for (j = 1; j <= nB; j++) { \ + trace = M[i][j].trace; \ + /* Remove traces to unreachable points. */ \ + if (!M[i-1][j-1].path) trace &= ~DIAGONAL; \ + if (!M[i][j-1].path) trace &= ~HORIZONTAL; \ + if (!M[i-1][j].path) trace &= ~VERTICAL; \ + if (trace & (STARTPOINT | HORIZONTAL | VERTICAL | DIAGONAL)) { \ + /* The point is reachable. */ \ + if (trace & ENDPOINT) M[i][j].path = 0; /* no extensions after ENDPOINT */ \ + else M[i][j].path = 1; \ + } \ + else { \ + /* The point is not reachable. Then it is not a STARTPOINT, \ + * all traces from it can be removed, and it cannot act as \ + * an ENDPOINT. */ \ + M[i][j].path = 0; \ + trace = 0; \ + } \ + M[i][j].trace = trace; \ + } \ + } \ + if (maximum == 0) M[0][0].path = NONE; \ + else M[0][0].path = 0; \ + return Py_BuildValue("fN", maximum, paths); + + +#define GOTOH_GLOBAL_SCORE(align_score) \ + int i; \ + int j; \ + int kA; \ + int kB; \ + const double gap_open_A = self->target_internal_open_gap_score; \ + const double gap_open_B = self->query_internal_open_gap_score; \ + const double gap_extend_A = self->target_internal_extend_gap_score; \ + const double gap_extend_B = self->query_internal_extend_gap_score; \ + double left_gap_open_A; \ + double left_gap_open_B; \ + double left_gap_extend_A; \ + double left_gap_extend_B; \ + double right_gap_open_A; \ + double right_gap_open_B; \ + double right_gap_extend_A; \ + double right_gap_extend_B; \ + double* M_row = NULL; \ + double* Ix_row = NULL; \ + double* Iy_row = NULL; \ + double score; \ + double temp; \ + double M_temp; \ + double Ix_temp; \ + double Iy_temp; \ + switch (strand) { \ + case '+': \ + left_gap_open_A = self->target_left_open_gap_score; \ + left_gap_open_B = self->query_left_open_gap_score; \ + left_gap_extend_A = self->target_left_extend_gap_score; \ + left_gap_extend_B = self->query_left_extend_gap_score; \ + right_gap_open_A = self->target_right_open_gap_score; \ + right_gap_open_B = self->query_right_open_gap_score; \ + right_gap_extend_A = self->target_right_extend_gap_score; \ + right_gap_extend_B = self->query_right_extend_gap_score; \ + break; \ + case '-': \ + left_gap_open_A = self->target_right_open_gap_score; \ + left_gap_open_B = self->query_right_open_gap_score; \ + left_gap_extend_A = self->target_right_extend_gap_score; \ + left_gap_extend_B = self->query_right_extend_gap_score; \ + right_gap_open_A = self->target_left_open_gap_score; \ + right_gap_open_B = self->query_left_open_gap_score; \ + right_gap_extend_A = self->target_left_extend_gap_score; \ + right_gap_extend_B = self->query_left_extend_gap_score; \ + break; \ + default: \ + PyErr_SetString(PyExc_RuntimeError, "strand was neither '+' nor '-'"); \ + return NULL; \ + } \ +\ + /* Gotoh algorithm with three states */ \ + M_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!M_row) goto exit; \ + Ix_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Ix_row) goto exit; \ + Iy_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Iy_row) goto exit; \ +\ + /* The top row of the score matrix is a special case, \ + * as there are no previously aligned characters. \ + */ \ + M_row[0] = 0; \ + Ix_row[0] = -DBL_MAX; \ + Iy_row[0] = -DBL_MAX; \ + for (j = 1; j <= nB; j++) { \ + M_row[j] = -DBL_MAX; \ + Ix_row[j] = -DBL_MAX; \ + Iy_row[j] = left_gap_open_A + left_gap_extend_A * (j-1); \ + } \ +\ + for (i = 1; i < nA; i++) { \ + M_temp = M_row[0]; \ + Ix_temp = Ix_row[0]; \ + Iy_temp = Iy_row[0]; \ + M_row[0] = -DBL_MAX; \ + Ix_row[0] = left_gap_open_B + left_gap_extend_B * (i-1); \ + Iy_row[0] = -DBL_MAX; \ + kA = sA[i-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_GLOBAL(M_temp, \ + Ix_temp, \ + Iy_temp); \ + M_temp = M_row[j]; \ + M_row[j] = score + (align_score); \ + SELECT_SCORE_GLOBAL(M_temp + gap_open_B, \ + Ix_row[j] + gap_extend_B, \ + Iy_row[j] + gap_open_B); \ + Ix_temp = Ix_row[j]; \ + Ix_row[j] = score; \ + SELECT_SCORE_GLOBAL(M_row[j-1] + gap_open_A, \ + Ix_row[j-1] + gap_open_A, \ + Iy_row[j-1] + gap_extend_A); \ + Iy_temp = Iy_row[j]; \ + Iy_row[j] = score; \ + } \ + kB = sB[nB-1]; \ + SELECT_SCORE_GLOBAL(M_temp, \ + Ix_temp, \ + Iy_temp); \ + M_temp = M_row[nB]; \ + M_row[nB] = score + (align_score); \ + SELECT_SCORE_GLOBAL(M_temp + right_gap_open_B, \ + Ix_row[nB] + right_gap_extend_B, \ + Iy_row[nB] + right_gap_open_B); \ + Ix_row[nB] = score; \ + SELECT_SCORE_GLOBAL(M_row[nB-1] + gap_open_A, \ + Iy_row[nB-1] + gap_extend_A, \ + Ix_row[nB-1] + gap_open_A); \ + Iy_row[nB] = score; \ + } \ +\ + M_temp = M_row[0]; \ + Ix_temp = Ix_row[0]; \ + Iy_temp = Iy_row[0]; \ + M_row[0] = -DBL_MAX; \ + Ix_row[0] = left_gap_open_B + left_gap_extend_B * (i-1); \ + Iy_row[0] = -DBL_MAX; \ + kA = sA[nA-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_GLOBAL(M_temp, \ + Ix_temp, \ + Iy_temp); \ + M_temp = M_row[j]; \ + M_row[j] = score + (align_score); \ + SELECT_SCORE_GLOBAL(M_temp + gap_open_B, \ + Ix_row[j] + gap_extend_B, \ + Iy_row[j] + gap_open_B); \ + Ix_temp = Ix_row[j]; \ + Ix_row[j] = score; \ + SELECT_SCORE_GLOBAL(M_row[j-1] + right_gap_open_A, \ + Iy_row[j-1] + right_gap_extend_A, \ + Ix_row[j-1] + right_gap_open_A); \ + Iy_temp = Iy_row[j]; \ + Iy_row[j] = score; \ + } \ +\ + kB = sB[nB-1]; \ + SELECT_SCORE_GLOBAL(M_temp, \ + Ix_temp, \ + Iy_temp); \ + M_temp = M_row[nB]; \ + M_row[nB] = score + (align_score); \ + SELECT_SCORE_GLOBAL(M_temp + right_gap_open_B, \ + Ix_row[nB] + right_gap_extend_B, \ + Iy_row[nB] + right_gap_open_B); \ + Ix_temp = Ix_row[nB]; \ + Ix_row[nB] = score; \ + SELECT_SCORE_GLOBAL(M_row[nB-1] + right_gap_open_A, \ + Ix_row[nB-1] + right_gap_open_A, \ + Iy_row[nB-1] + right_gap_extend_A); \ + Iy_temp = Iy_row[nB]; \ + Iy_row[nB] = score; \ +\ + SELECT_SCORE_GLOBAL(M_row[nB], Ix_row[nB], Iy_row[nB]); \ + PyMem_Free(M_row); \ + PyMem_Free(Ix_row); \ + PyMem_Free(Iy_row); \ + return PyFloat_FromDouble(score); \ +\ +exit: \ + if (M_row) PyMem_Free(M_row); \ + if (Ix_row) PyMem_Free(Ix_row); \ + if (Iy_row) PyMem_Free(Iy_row); \ + return PyErr_NoMemory(); \ + + +#define GOTOH_LOCAL_SCORE(align_score) \ + int i; \ + int j; \ + int kA; \ + int kB; \ + const double gap_open_A = self->target_internal_open_gap_score; \ + const double gap_open_B = self->query_internal_open_gap_score; \ + const double gap_extend_A = self->target_internal_extend_gap_score; \ + const double gap_extend_B = self->query_internal_extend_gap_score; \ + double* M_row = NULL; \ + double* Ix_row = NULL; \ + double* Iy_row = NULL; \ + double score; \ + double temp; \ + double M_temp; \ + double Ix_temp; \ + double Iy_temp; \ + double maximum = 0.0; \ +\ + /* Gotoh algorithm with three states */ \ + M_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!M_row) goto exit; \ + Ix_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Ix_row) goto exit; \ + Iy_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Iy_row) goto exit; \ + \ + /* The top row of the score matrix is a special case, \ + * as there are no previously aligned characters. \ + */ \ + M_row[0] = 0; \ + Ix_row[0] = -DBL_MAX; \ + Iy_row[0] = -DBL_MAX; \ + for (j = 1; j <= nB; j++) { \ + M_row[j] = -DBL_MAX; \ + Ix_row[j] = -DBL_MAX; \ + Iy_row[j] = 0; \ + } \ + for (i = 1; i < nA; i++) { \ + M_temp = M_row[0]; \ + Ix_temp = Ix_row[0]; \ + Iy_temp = Iy_row[0]; \ + M_row[0] = -DBL_MAX; \ + Ix_row[0] = 0; \ + Iy_row[0] = -DBL_MAX; \ + kA = sA[i-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_GOTOH_LOCAL_ALIGN(M_temp, \ + Ix_temp, \ + Iy_temp, \ + (align_score)); \ + M_temp = M_row[j]; \ + M_row[j] = score; \ + SELECT_SCORE_LOCAL3(M_temp + gap_open_B, \ + Ix_row[j] + gap_extend_B, \ + Iy_row[j] + gap_open_B); \ + Ix_temp = Ix_row[j]; \ + Ix_row[j] = score; \ + SELECT_SCORE_LOCAL3(M_row[j-1] + gap_open_A, \ + Ix_row[j-1] + gap_open_A, \ + Iy_row[j-1] + gap_extend_A); \ + Iy_temp = Iy_row[j]; \ + Iy_row[j] = score; \ + } \ + kB = sB[nB-1]; \ + Ix_row[nB] = 0; \ + Iy_row[nB] = 0; \ + SELECT_SCORE_GOTOH_LOCAL_ALIGN(M_temp, \ + Ix_temp, \ + Iy_temp, \ + (align_score)); \ + M_temp = M_row[nB]; \ + M_row[nB] = score; \ + } \ + M_temp = M_row[0]; \ + Ix_temp = Ix_row[0]; \ + Iy_temp = Iy_row[0]; \ + M_row[0] = -DBL_MAX; \ + Ix_row[0] = 0; \ + Iy_row[0] = -DBL_MAX; \ + kA = sA[nA-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_GOTOH_LOCAL_ALIGN(M_temp, \ + Ix_temp, \ + Iy_temp, \ + (align_score)); \ + M_temp = M_row[j]; \ + M_row[j] = score; \ + Ix_temp = Ix_row[j]; \ + Iy_temp = Iy_row[j]; \ + Ix_row[j] = 0; \ + Iy_row[j] = 0; \ + } \ + kB = sB[nB-1]; \ + SELECT_SCORE_GOTOH_LOCAL_ALIGN(M_temp, \ + Ix_temp, \ + Iy_temp, \ + (align_score)); \ + PyMem_Free(M_row); \ + PyMem_Free(Ix_row); \ + PyMem_Free(Iy_row); \ + return PyFloat_FromDouble(maximum); \ +exit: \ + if (M_row) PyMem_Free(M_row); \ + if (Ix_row) PyMem_Free(Ix_row); \ + if (Iy_row) PyMem_Free(Iy_row); \ + return PyErr_NoMemory(); \ + + +#define GOTOH_GLOBAL_ALIGN(align_score) \ + int i; \ + int j; \ + int kA; \ + int kB; \ + const double gap_open_A = self->target_internal_open_gap_score; \ + const double gap_open_B = self->query_internal_open_gap_score; \ + const double gap_extend_A = self->target_internal_extend_gap_score; \ + const double gap_extend_B = self->query_internal_extend_gap_score; \ + double left_gap_open_A; \ + double left_gap_open_B; \ + double left_gap_extend_A; \ + double left_gap_extend_B; \ + double right_gap_open_A; \ + double right_gap_open_B; \ + double right_gap_extend_A; \ + double right_gap_extend_B; \ + const double epsilon = self->epsilon; \ + TraceGapsGotoh** gaps = NULL; \ + Trace** M = NULL; \ + double* M_row = NULL; \ + double* Ix_row = NULL; \ + double* Iy_row = NULL; \ + double score; \ + int trace; \ + double temp; \ + double M_temp; \ + double Ix_temp; \ + double Iy_temp; \ + PathGenerator* paths; \ + switch (strand) { \ + case '+': \ + left_gap_open_A = self->target_left_open_gap_score; \ + left_gap_open_B = self->query_left_open_gap_score; \ + left_gap_extend_A = self->target_left_extend_gap_score; \ + left_gap_extend_B = self->query_left_extend_gap_score; \ + right_gap_open_A = self->target_right_open_gap_score; \ + right_gap_open_B = self->query_right_open_gap_score; \ + right_gap_extend_A = self->target_right_extend_gap_score; \ + right_gap_extend_B = self->query_right_extend_gap_score; \ + break; \ + case '-': \ + left_gap_open_A = self->target_right_open_gap_score; \ + left_gap_open_B = self->query_right_open_gap_score; \ + left_gap_extend_A = self->target_right_extend_gap_score; \ + left_gap_extend_B = self->query_right_extend_gap_score; \ + right_gap_open_A = self->target_left_open_gap_score; \ + right_gap_open_B = self->query_left_open_gap_score; \ + right_gap_extend_A = self->target_left_extend_gap_score; \ + right_gap_extend_B = self->query_left_extend_gap_score; \ + break; \ + default: \ + PyErr_SetString(PyExc_RuntimeError, "strand was neither '+' nor '-'"); \ + return NULL; \ + } \ +\ + /* Gotoh algorithm with three states */ \ + paths = PathGenerator_create_Gotoh(nA, nB, Global, strand); \ + if (!paths) return NULL; \ + M_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!M_row) goto exit; \ + Ix_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Ix_row) goto exit; \ + Iy_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Iy_row) goto exit; \ + M = paths->M; \ + gaps = paths->gaps.gotoh; \ + \ + /* Gotoh algorithm with three states */ \ + M_row[0] = 0; \ + Ix_row[0] = -DBL_MAX; \ + Iy_row[0] = -DBL_MAX; \ + for (j = 1; j <= nB; j++) { \ + M_row[j] = -DBL_MAX; \ + Ix_row[j] = -DBL_MAX; \ + Iy_row[j] = left_gap_open_A + left_gap_extend_A * (j-1); \ + } \ + for (i = 1; i < nA; i++) { \ + kA = sA[i-1]; \ + M_temp = M_row[0]; \ + Ix_temp = Ix_row[0]; \ + Iy_temp = Iy_row[0]; \ + M_row[0] = -DBL_MAX; \ + Ix_row[0] = left_gap_open_B + left_gap_extend_B * (i-1); \ + Iy_row[0] = -DBL_MAX; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_TRACE_GOTOH_GLOBAL_ALIGN; \ + M_temp = M_row[j]; \ + M_row[j] = score + (align_score); \ + SELECT_TRACE_GOTOH_GLOBAL_GAP(Ix, \ + M_temp + gap_open_B, \ + Ix_row[j] + gap_extend_B, \ + Iy_row[j] + gap_open_B); \ + Ix_temp = Ix_row[j]; \ + Ix_row[j] = score; \ + SELECT_TRACE_GOTOH_GLOBAL_GAP(Iy, \ + M_row[j-1] + gap_open_A, \ + Ix_row[j-1] + gap_open_A, \ + Iy_row[j-1] + gap_extend_A); \ + Iy_temp = Iy_row[j]; \ + Iy_row[j] = score; \ + } \ + kB = sB[nB-1]; \ + SELECT_TRACE_GOTOH_GLOBAL_ALIGN; \ + M_temp = M_row[nB]; \ + M_row[nB] = score + (align_score); \ + SELECT_TRACE_GOTOH_GLOBAL_GAP(Ix, \ + M_temp + right_gap_open_B, \ + Ix_row[nB] + right_gap_extend_B, \ + Iy_row[nB] + right_gap_open_B); \ + Ix_temp = Ix_row[nB]; \ + Ix_row[nB] = score; \ + SELECT_TRACE_GOTOH_GLOBAL_GAP(Iy, \ + M_row[nB-1] + gap_open_A, \ + Ix_row[nB-1] + gap_open_A, \ + Iy_row[nB-1] + gap_extend_A); \ + Iy_temp = Iy_row[nB]; \ + Iy_row[nB] = score; \ + } \ + kA = sA[nA-1]; \ + M_temp = M_row[0]; \ + Ix_temp = Ix_row[0]; \ + Iy_temp = Iy_row[0]; \ + M_row[0] = -DBL_MAX; \ + Ix_row[0] = left_gap_open_B + left_gap_extend_B * (nA-1); \ + Iy_row[0] = -DBL_MAX; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_TRACE_GOTOH_GLOBAL_ALIGN; \ + M_temp = M_row[j]; \ + M_row[j] = score + (align_score); \ + SELECT_TRACE_GOTOH_GLOBAL_GAP(Ix, \ + M_temp + gap_open_B, \ + Ix_row[j] + gap_extend_B, \ + Iy_row[j] + gap_open_B); \ + Ix_temp = Ix_row[j]; \ + Ix_row[j] = score; \ + SELECT_TRACE_GOTOH_GLOBAL_GAP(Iy, \ + M_row[j-1] + right_gap_open_A, \ + Ix_row[j-1] + right_gap_open_A, \ + Iy_row[j-1] + right_gap_extend_A); \ + Iy_temp = Iy_row[j]; \ + Iy_row[j] = score; \ + } \ + kB = sB[nB-1]; \ + SELECT_TRACE_GOTOH_GLOBAL_ALIGN; \ + M_temp = M_row[j]; \ + M_row[j] = score + (align_score); \ + SELECT_TRACE_GOTOH_GLOBAL_GAP(Ix, \ + M_temp + right_gap_open_B, \ + Ix_row[j] + right_gap_extend_B, \ + Iy_row[j] + right_gap_open_B); \ + Ix_row[nB] = score; \ + SELECT_TRACE_GOTOH_GLOBAL_GAP(Iy, \ + M_row[j-1] + right_gap_open_A, \ + Ix_row[j-1] + right_gap_open_A, \ + Iy_row[j-1] + right_gap_extend_A); \ + Iy_row[nB] = score; \ + M[nA][nB].path = 0; \ + \ + /* traceback */ \ + SELECT_SCORE_GLOBAL(M_row[nB], Ix_row[nB], Iy_row[nB]); \ + if (M_row[nB] < score - epsilon) M[nA][nB].trace = 0; \ + if (Ix_row[nB] < score - epsilon) gaps[nA][nB].Ix = 0; \ + if (Iy_row[nB] < score - epsilon) gaps[nA][nB].Iy = 0; \ + return Py_BuildValue("fN", score, paths); \ +exit: \ + Py_DECREF(paths); \ + if (M_row) PyMem_Free(M_row); \ + if (Ix_row) PyMem_Free(Ix_row); \ + if (Iy_row) PyMem_Free(Iy_row); \ + return PyErr_NoMemory(); \ + + +#define GOTOH_LOCAL_ALIGN(align_score) \ + int i; \ + int j; \ + int im = nA; \ + int jm = nB; \ + int kA; \ + int kB; \ + const double gap_open_A = self->target_internal_open_gap_score; \ + const double gap_open_B = self->query_internal_open_gap_score; \ + const double gap_extend_A = self->target_internal_extend_gap_score; \ + const double gap_extend_B = self->query_internal_extend_gap_score; \ + const double epsilon = self->epsilon; \ + Trace** M = NULL; \ + TraceGapsGotoh** gaps = NULL; \ + double* M_row = NULL; \ + double* Ix_row = NULL; \ + double* Iy_row = NULL; \ + double score; \ + int trace; \ + double temp; \ + double M_temp; \ + double Ix_temp; \ + double Iy_temp; \ + double maximum = 0.0; \ + PathGenerator* paths; \ + \ + /* Gotoh algorithm with three states */ \ + paths = PathGenerator_create_Gotoh(nA, nB, Local, strand); \ + if (!paths) return NULL; \ + M = paths->M; \ + gaps = paths->gaps.gotoh; \ + M_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!M_row) goto exit; \ + Ix_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Ix_row) goto exit; \ + Iy_row = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Iy_row) goto exit; \ + M_row[0] = 0; \ + Ix_row[0] = -DBL_MAX; \ + Iy_row[0] = -DBL_MAX; \ + for (j = 1; j <= nB; j++) { \ + M_row[j] = 0; \ + Ix_row[j] = -DBL_MAX; \ + Iy_row[j] = -DBL_MAX; \ + } \ + for (i = 1; i < nA; i++) { \ + M_temp = M_row[0]; \ + Ix_temp = Ix_row[0]; \ + Iy_temp = Iy_row[0]; \ + M_row[0] = 0; \ + Ix_row[0] = -DBL_MAX; \ + Iy_row[0] = -DBL_MAX; \ + kA = sA[i-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \ + M_temp = M_row[j]; \ + M_row[j] = score; \ + SELECT_TRACE_GOTOH_LOCAL_GAP(Ix, \ + M_temp + gap_open_B, \ + Ix_row[j] + gap_extend_B, \ + Iy_row[j] + gap_open_B); \ + Ix_temp = Ix_row[j]; \ + Ix_row[j] = score; \ + SELECT_TRACE_GOTOH_LOCAL_GAP(Iy, \ + M_row[j-1] + gap_open_A, \ + Ix_row[j-1] + gap_open_A, \ + Iy_row[j-1] + gap_extend_A); \ + Iy_temp = Iy_row[j]; \ + Iy_row[j] = score; \ + } \ + kB = sB[nB-1]; \ + SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \ + M_temp = M_row[j]; \ + M_row[j] = score; \ + Ix_temp = Ix_row[nB]; \ + Ix_row[nB] = 0; \ + gaps[i][nB].Ix = 0; \ + Iy_temp = Iy_row[nB]; \ + Iy_row[nB] = 0; \ + gaps[i][nB].Iy = 0; \ + } \ + M_temp = M_row[0]; \ + M_row[0] = 0; \ + M[nA][0].trace = 0; \ + Ix_temp = Ix_row[0]; \ + Ix_row[0] = -DBL_MAX; \ + gaps[nA][0].Ix = 0; \ + gaps[nA][0].Iy = 0; \ + Iy_temp = Iy_row[0]; \ + Iy_row[0] = -DBL_MAX; \ + kA = sA[nA-1]; \ + for (j = 1; j < nB; j++) { \ + kB = sB[j-1]; \ + SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \ + M_temp = M_row[j]; \ + M_row[j] = score; \ + Ix_temp = Ix_row[j]; \ + Ix_row[j] = 0; \ + gaps[nA][j].Ix = 0; \ + Iy_temp = Iy_row[j]; \ + Iy_row[j] = 0; \ + gaps[nA][j].Iy = 0; \ + } \ + kB = sB[nB-1]; \ + SELECT_TRACE_GOTOH_LOCAL_ALIGN(align_score) \ + gaps[nA][nB].Ix = 0; \ + gaps[nA][nB].Iy = 0; \ +\ + PyMem_Free(M_row); \ + PyMem_Free(Ix_row); \ + PyMem_Free(Iy_row); \ +\ + /* As we don't allow zero-score extensions to alignments, \ + * we need to remove all traces towards an ENDPOINT. \ + * In addition, some points then won't have any path to a STARTPOINT. \ + * Here, use path as a temporary variable to indicate if the point \ + * is reachable from a STARTPOINT. If it is unreachable, remove all \ + * traces from it, and don't allow it to be an ENDPOINT. It may still \ + * be a valid STARTPOINT. */ \ + for (j = 0; j <= nB; j++) M[0][j].path = M_MATRIX; \ + for (i = 1; i <= nA; i++) { \ + M[i][0].path = M_MATRIX; \ + for (j = 1; j <= nB; j++) { \ + /* Remove traces to unreachable points. */ \ + trace = M[i][j].trace; \ + if (!(M[i-1][j-1].path & M_MATRIX)) trace &= ~M_MATRIX; \ + if (!(M[i-1][j-1].path & Ix_MATRIX)) trace &= ~Ix_MATRIX; \ + if (!(M[i-1][j-1].path & Iy_MATRIX)) trace &= ~Iy_MATRIX; \ + if (trace & (STARTPOINT | M_MATRIX | Ix_MATRIX | Iy_MATRIX)) { \ + /* The point is reachable. */ \ + if (trace & ENDPOINT) M[i][j].path = 0; /* no extensions after ENDPOINT */ \ + else M[i][j].path |= M_MATRIX; \ + } \ + else { \ + /* The point is not reachable. Then it is not a STARTPOINT, \ + * all traces from it can be removed, and it cannot act as \ + * an ENDPOINT. */ \ + M[i][j].path &= ~M_MATRIX; \ + trace = 0; \ + } \ + M[i][j].trace = trace; \ + trace = gaps[i][j].Ix; \ + if (!(M[i-1][j].path & M_MATRIX)) trace &= ~M_MATRIX; \ + if (!(M[i-1][j].path & Ix_MATRIX)) trace &= ~Ix_MATRIX; \ + if (!(M[i-1][j].path & Iy_MATRIX)) trace &= ~Iy_MATRIX; \ + if (trace & (M_MATRIX | Ix_MATRIX | Iy_MATRIX)) { \ + /* The point is reachable. */ \ + M[i][j].path |= Ix_MATRIX; \ + } \ + else { \ + /* The point is not reachable. Then \ + * all traces from it can be removed. */ \ + M[i][j].path &= ~Ix_MATRIX; \ + trace = 0; \ + } \ + gaps[i][j].Ix = trace; \ + trace = gaps[i][j].Iy; \ + if (!(M[i][j-1].path & M_MATRIX)) trace &= ~M_MATRIX; \ + if (!(M[i][j-1].path & Ix_MATRIX)) trace &= ~Ix_MATRIX; \ + if (!(M[i][j-1].path & Iy_MATRIX)) trace &= ~Iy_MATRIX; \ + if (trace & (M_MATRIX | Ix_MATRIX | Iy_MATRIX)) { \ + /* The point is reachable. */ \ + M[i][j].path |= Iy_MATRIX; \ + } \ + else { \ + /* The point is not reachable. Then \ + * all traces from it can be removed. */ \ + M[i][j].path &= ~Iy_MATRIX; \ + trace = 0; \ + } \ + gaps[i][j].Iy = trace; \ + } \ + } \ +\ + /* traceback */ \ + if (maximum == 0) M[0][0].path = DONE; \ + else M[0][0].path = 0; \ + return Py_BuildValue("fN", maximum, paths); \ +\ +exit: \ + Py_DECREF(paths); \ + if (M_row) PyMem_Free(M_row); \ + if (Ix_row) PyMem_Free(Ix_row); \ + if (Iy_row) PyMem_Free(Iy_row); \ + return PyErr_NoMemory(); \ + + +#define WATERMANSMITHBEYER_ENTER_SCORE \ + int i; \ + int j = 0; \ + int k; \ + int kA; \ + int kB; \ + double** M = NULL; \ + double** Ix = NULL; \ + double** Iy = NULL; \ + double score = 0.0; \ + double gapscore = 0.0; \ + double temp; \ + int ok = 1; \ + PyObject* result = NULL; \ +\ + /* Waterman-Smith-Beyer algorithm */ \ + M = PyMem_Malloc((nA+1)*sizeof(double*)); \ + if (!M) goto exit; \ + Ix = PyMem_Malloc((nA+1)*sizeof(double*)); \ + if (!Ix) goto exit; \ + Iy = PyMem_Malloc((nA+1)*sizeof(double*)); \ + if (!Iy) goto exit; \ + for (i = 0; i <= nA; i++) { \ + M[i] = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!M[i]) goto exit; \ + Ix[i] = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Ix[i]) goto exit; \ + Iy[i] = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Iy[i]) goto exit; \ + } \ + + +#define WATERMANSMITHBEYER_GLOBAL_SCORE(align_score, query_gap_start) \ + /* The top row of the score matrix is a special case, \ + * as there are no previously aligned characters. \ + */ \ + M[0][0] = 0; \ + Ix[0][0] = -DBL_MAX; \ + Iy[0][0] = -DBL_MAX; \ + for (i = 1; i <= nA; i++) { \ + M[i][0] = -DBL_MAX; \ + Iy[i][0] = -DBL_MAX; \ + ok = _call_query_gap_function(self, query_gap_start, i, &score); \ + if (!ok) goto exit; \ + Ix[i][0] = score; \ + } \ + for (j = 1; j <= nB; j++) { \ + M[0][j] = -DBL_MAX; \ + Ix[0][j] = -DBL_MAX; \ + ok = _call_target_gap_function(self, 0, j, &score); \ + if (!ok) goto exit; \ + Iy[0][j] = score; \ + } \ + for (i = 1; i <= nA; i++) { \ + kA = sA[i-1]; \ + for (j = 1; j <= nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_GLOBAL(M[i-1][j-1], Ix[i-1][j-1], Iy[i-1][j-1]); \ + M[i][j] = score + (align_score); \ + score = -DBL_MAX; \ + for (k = 1; k <= i; k++) { \ + ok = _call_query_gap_function(self, query_gap_start, k, &gapscore); \ + if (!ok) goto exit; \ + SELECT_SCORE_WATERMAN_SMITH_BEYER(M[i-k][j], Iy[i-k][j]); \ + } \ + Ix[i][j] = score; \ + score = -DBL_MAX; \ + for (k = 1; k <= j; k++) { \ + ok = _call_target_gap_function(self, i, k, &gapscore); \ + if (!ok) goto exit; \ + SELECT_SCORE_WATERMAN_SMITH_BEYER(M[i][j-k], Ix[i][j-k]); \ + } \ + Iy[i][j] = score; \ + } \ + } \ + SELECT_SCORE_GLOBAL(M[nA][nB], Ix[nA][nB], Iy[nA][nB]); \ +\ + result = PyFloat_FromDouble(score); \ + + +#define WATERMANSMITHBEYER_LOCAL_SCORE(align_score, query_gap_start) \ + /* The top row of the score matrix is a special case, \ + * as there are no previously aligned characters. \ + */ \ + M[0][0] = 0; \ + Ix[0][0] = -DBL_MAX; \ + Iy[0][0] = -DBL_MAX; \ + for (i = 1; i <= nA; i++) { \ + M[i][0] = -DBL_MAX; \ + Ix[i][0] = 0; \ + Iy[i][0] = -DBL_MAX; \ + } \ + for (j = 1; j <= nB; j++) { \ + M[0][j] = -DBL_MAX; \ + Ix[0][j] = -DBL_MAX; \ + Iy[0][j] = 0; \ + } \ + for (i = 1; i <= nA; i++) { \ + kA = sA[i-1]; \ + for (j = 1; j <= nB; j++) { \ + kB = sB[j-1]; \ + SELECT_SCORE_GOTOH_LOCAL_ALIGN(M[i-1][j-1], \ + Ix[i-1][j-1], \ + Iy[i-1][j-1], \ + (align_score)); \ + M[i][j] = score; \ + if (i == nA || j == nB) { \ + Ix[i][j] = 0; \ + Iy[i][j] = 0; \ + continue; \ + } \ + score = 0.0; \ + for (k = 1; k <= i; k++) { \ + ok = _call_query_gap_function(self, query_gap_start, k, &gapscore); \ + SELECT_SCORE_WATERMAN_SMITH_BEYER(M[i-k][j], Iy[i-k][j]); \ + if (!ok) goto exit; \ + } \ + if (score > maximum) maximum = score; \ + Ix[i][j] = score; \ + score = 0.0; \ + for (k = 1; k <= j; k++) { \ + ok = _call_target_gap_function(self, i, k, &gapscore); \ + if (!ok) goto exit; \ + SELECT_SCORE_WATERMAN_SMITH_BEYER(M[i][j-k], Ix[i][j-k]); \ + } \ + if (score > maximum) maximum = score; \ + Iy[i][j] = score; \ + } \ + } \ + SELECT_SCORE_GLOBAL(M[nA][nB], Ix[nA][nB], Iy[nA][nB]); \ + if (score > maximum) maximum = score; \ + result = PyFloat_FromDouble(maximum); \ + + +#define WATERMANSMITHBEYER_EXIT_SCORE \ +exit: \ + if (M) { \ + /* If M is NULL, then Ix is also NULL. */ \ + if (Ix) { \ + /* If Ix is NULL, then Iy is also NULL. */ \ + if (Iy) { \ + /* If Iy is NULL, then M[i], Ix[i], and Iy[i] are \ + * also NULL. */ \ + for (i = 0; i <= nA; i++) { \ + if (!M[i]) break; \ + PyMem_Free(M[i]); \ + if (!Ix[i]) break; \ + PyMem_Free(Ix[i]); \ + if (!Iy[i]) break; \ + PyMem_Free(Iy[i]); \ + } \ + PyMem_Free(Iy); \ + } \ + PyMem_Free(Ix); \ + } \ + PyMem_Free(M); \ + } \ + if (!ok) return NULL; \ + if (!result) return PyErr_NoMemory(); \ + return result; \ + + +#define WATERMANSMITHBEYER_ENTER_ALIGN(mode) \ + int i; \ + int j = 0; \ + int gap; \ + int kA; \ + int kB; \ + const double epsilon = self->epsilon; \ + Trace** M; \ + TraceGapsWatermanSmithBeyer** gaps; \ + double** M_row; \ + double** Ix_row; \ + double** Iy_row; \ + int ng; \ + int nm; \ + double score; \ + double gapscore; \ + double temp; \ + int trace; \ + int* gapM; \ + int* gapXY; \ + int ok = 1; \ + PathGenerator* paths = NULL; \ + \ + /* Waterman-Smith-Beyer algorithm */ \ + paths = PathGenerator_create_WSB(nA, nB, mode, strand); \ + if (!paths) return NULL; \ + M = paths->M; \ + gaps = paths->gaps.waterman_smith_beyer; \ + M_row = PyMem_Malloc((nA+1)*sizeof(double*)); \ + if (!M_row) goto exit; \ + Ix_row = PyMem_Malloc((nA+1)*sizeof(double*)); \ + if (!Ix_row) goto exit; \ + Iy_row = PyMem_Malloc((nA+1)*sizeof(double*)); \ + if (!Iy_row) goto exit; \ + for (i = 0; i <= nA; i++) { \ + M_row[i] = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!M_row[i]) goto exit; \ + Ix_row[i] = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Ix_row[i]) goto exit; \ + Iy_row[i] = PyMem_Malloc((nB+1)*sizeof(double)); \ + if (!Iy_row[i]) goto exit; \ + } \ + + +#define WATERMANSMITHBEYER_GLOBAL_ALIGN(align_score, query_gap_start) \ + M_row[0][0] = 0; \ + Ix_row[0][0] = -DBL_MAX; \ + Iy_row[0][0] = -DBL_MAX; \ + for (i = 1; i <= nA; i++) { \ + M_row[i][0] = -DBL_MAX; \ + Iy_row[i][0] = -DBL_MAX; \ + ok = _call_query_gap_function(self, query_gap_start, i, &score); \ + if (!ok) goto exit; \ + Ix_row[i][0] = score; \ + } \ + for (j = 1; j <= nB; j++) { \ + M_row[0][j] = -DBL_MAX; \ + Ix_row[0][j] = -DBL_MAX; \ + ok = _call_target_gap_function(self, query_gap_start, j, &score); \ + if (!ok) goto exit; \ + Iy_row[0][j] = score; \ + } \ + for (i = 1; i <= nA; i++) { \ + kA = sA[i-1]; \ + for (j = 1; j <= nB; j++) { \ + kB = sB[j-1]; \ + SELECT_TRACE_WATERMAN_SMITH_BEYER_GLOBAL_ALIGN((align_score)); \ + gapM = PyMem_Malloc((i+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gaps[i][j].MIx = gapM; \ + gapXY = PyMem_Malloc((i+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gaps[i][j].IyIx = gapXY; \ + nm = 0; \ + ng = 0; \ + score = -DBL_MAX; \ + for (gap = 1; gap <= i; gap++) { \ + ok = _call_query_gap_function(self, query_gap_start, gap, &gapscore); \ + if (!ok) goto exit; \ + SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(M_row[i-gap][j], \ + Iy_row[i-gap][j]); \ + } \ + gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gaps[i][j].MIx = gapM; \ + gapM[nm] = 0; \ + gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gapXY[ng] = 0; \ + gaps[i][j].IyIx = gapXY; \ + Ix_row[i][j] = score; \ + gapM = PyMem_Malloc((j+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gaps[i][j].MIy = gapM; \ + gapXY = PyMem_Malloc((j+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gaps[i][j].IxIy = gapXY; \ + nm = 0; \ + ng = 0; \ + score = -DBL_MAX; \ + for (gap = 1; gap <= j; gap++) { \ + ok = _call_target_gap_function(self, i, gap, &gapscore); \ + if (!ok) goto exit; \ + SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(M_row[i][j-gap], \ + Ix_row[i][j-gap]); \ + } \ + Iy_row[i][j] = score; \ + gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gaps[i][j].MIy = gapM; \ + gapM[nm] = 0; \ + gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gaps[i][j].IxIy = gapXY; \ + gapXY[ng] = 0; \ + } \ + } \ + /* traceback */ \ + SELECT_SCORE_GLOBAL(M_row[nA][nB], Ix_row[nA][nB], Iy_row[nA][nB]); \ + M[nA][nB].path = 0; \ + if (M_row[nA][nB] < score - epsilon) M[nA][nB].trace = 0; \ + if (Ix_row[nA][nB] < score - epsilon) { \ + gapM = PyMem_Realloc(gaps[nA][nB].MIx, sizeof(int)); \ + if (!gapM) goto exit; \ + gapM[0] = 0; \ + gaps[nA][nB].MIx = gapM; \ + gapXY = PyMem_Realloc(gaps[nA][nB].IyIx, sizeof(int)); \ + if (!gapXY) goto exit; \ + gapXY[0] = 0; \ + gaps[nA][nB].IyIx = gapXY; \ + } \ + if (Iy_row[nA][nB] < score - epsilon) { \ + gapM = PyMem_Realloc(gaps[nA][nB].MIy, sizeof(int)); \ + if (!gapM) goto exit; \ + gapM[0] = 0; \ + gaps[nA][nB].MIy = gapM; \ + gapXY = PyMem_Realloc(gaps[nA][nB].IxIy, sizeof(int)); \ + if (!gapXY) goto exit; \ + gapXY[0] = 0; \ + gaps[nA][nB].IxIy = gapXY; \ + } \ + for (i = 0; i <= nA; i++) { \ + PyMem_Free(M_row[i]); \ + PyMem_Free(Ix_row[i]); \ + PyMem_Free(Iy_row[i]); \ + } \ + PyMem_Free(M_row); \ + PyMem_Free(Ix_row); \ + PyMem_Free(Iy_row); \ + return Py_BuildValue("fN", score, paths); \ + + +#define WATERMANSMITHBEYER_LOCAL_ALIGN(align_score, query_gap_start) \ + M_row[0][0] = 0; \ + Ix_row[0][0] = -DBL_MAX; \ + Iy_row[0][0] = -DBL_MAX; \ + for (i = 1; i <= nA; i++) { \ + M_row[i][0] = 0; \ + Ix_row[i][0] = -DBL_MAX; \ + Iy_row[i][0] = -DBL_MAX; \ + } \ + for (i = 1; i <= nB; i++) { \ + M_row[0][i] = 0; \ + Ix_row[0][i] = -DBL_MAX; \ + Iy_row[0][i] = -DBL_MAX; \ + } \ + for (i = 1; i <= nA; i++) { \ + kA = sA[i-1]; \ + for (j = 1; j <= nB; j++) { \ + kB = sB[j-1]; \ + nm = 0; \ + ng = 0; \ + SELECT_TRACE_WATERMAN_SMITH_BEYER_ALIGN( \ + M_row[i-1][j-1], \ + Ix_row[i-1][j-1], \ + Iy_row[i-1][j-1], \ + (align_score)); \ + M[i][j].path = 0; \ + if (i == nA || j == nB) { \ + Ix_row[i][j] = score; \ + gaps[i][j].MIx = NULL; \ + gaps[i][j].IyIx = NULL; \ + gaps[i][j].MIy = NULL; \ + gaps[i][j].IxIy = NULL; \ + Iy_row[i][j] = score; \ + continue; \ + } \ + gapM = PyMem_Malloc((i+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gaps[i][j].MIx = gapM; \ + gapXY = PyMem_Malloc((i+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gaps[i][j].IyIx = gapXY; \ + score = -DBL_MAX; \ + for (gap = 1; gap <= i; gap++) { \ + ok = _call_query_gap_function(self, query_gap_start, gap, &gapscore); \ + if (!ok) goto exit; \ + SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(M_row[i-gap][j], \ + Iy_row[i-gap][j]); \ + } \ + if (score < epsilon) { \ + score = -DBL_MAX; \ + nm = 0; \ + ng = 0; \ + } \ + else if (score > maximum) maximum = score; \ + gapM[nm] = 0; \ + gapXY[ng] = 0; \ + Ix_row[i][j] = score; \ + M[i][j].path = 0; \ + gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gaps[i][j].MIx = gapM; \ + gapM[nm] = 0; \ + gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gaps[i][j].IyIx = gapXY; \ + gapXY[ng] = 0; \ + gapM = PyMem_Malloc((j+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gaps[i][j].MIy = gapM; \ + gapXY = PyMem_Malloc((j+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gaps[i][j].IxIy = gapXY; \ + nm = 0; \ + ng = 0; \ + score = -DBL_MAX; \ + gapM[0] = 0; \ + for (gap = 1; gap <= j; gap++) { \ + ok = _call_target_gap_function(self, i, gap, &gapscore); \ + if (!ok) goto exit; \ + SELECT_TRACE_WATERMAN_SMITH_BEYER_GAP(M_row[i][j-gap], \ + Ix_row[i][j-gap]); \ + } \ + if (score < epsilon) { \ + score = -DBL_MAX; \ + nm = 0; \ + ng = 0; \ + } \ + else if (score > maximum) maximum = score; \ + gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gaps[i][j].MIy = gapM; \ + gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gaps[i][j].IxIy = gapXY; \ + gapM[nm] = 0; \ + gapXY[ng] = 0; \ + Iy_row[i][j] = score; \ + M[i][j].path = 0; \ + } \ + } \ + for (i = 0; i <= nA; i++) PyMem_Free(M_row[i]); \ + PyMem_Free(M_row); \ + for (i = 0; i <= nA; i++) PyMem_Free(Ix_row[i]); \ + PyMem_Free(Ix_row); \ + for (i = 0; i <= nA; i++) PyMem_Free(Iy_row[i]); \ + PyMem_Free(Iy_row); \ +\ + /* As we don't allow zero-score extensions to alignments, \ + * we need to remove all traces towards an ENDPOINT. \ + * In addition, some points then won't have any path to a STARTPOINT. \ + * Here, use path as a temporary variable to indicate if the point \ + * is reachable from a STARTPOINT. If it is unreachable, remove all \ + * traces from it, and don't allow it to be an ENDPOINT. It may still \ + * be a valid STARTPOINT. */ \ + for (j = 0; j <= nB; j++) M[0][j].path = M_MATRIX; \ + for (i = 1; i <= nA; i++) { \ + M[i][0].path = M_MATRIX; \ + for (j = 1; j <= nB; j++) { \ + /* Remove traces to unreachable points. */ \ + trace = M[i][j].trace; \ + if (!(M[i-1][j-1].path & M_MATRIX)) trace &= ~M_MATRIX; \ + if (!(M[i-1][j-1].path & Ix_MATRIX)) trace &= ~Ix_MATRIX; \ + if (!(M[i-1][j-1].path & Iy_MATRIX)) trace &= ~Iy_MATRIX; \ + if (trace & (STARTPOINT | M_MATRIX | Ix_MATRIX | Iy_MATRIX)) { \ + /* The point is reachable. */ \ + if (trace & ENDPOINT) M[i][j].path = 0; /* no extensions after ENDPOINT */ \ + else M[i][j].path |= M_MATRIX; \ + } \ + else { \ + /* The point is not reachable. Then it is not a STARTPOINT, \ + * all traces from it can be removed, and it cannot act as \ + * an ENDPOINT. */ \ + M[i][j].path &= ~M_MATRIX; \ + trace = 0; \ + } \ + M[i][j].trace = trace; \ + if (i == nA || j == nB) continue; \ + gapM = gaps[i][j].MIx; \ + gapXY = gaps[i][j].IyIx; \ + nm = 0; \ + ng = 0; \ + for (im = 0; (gap = gapM[im]); im++) \ + if (M[i-gap][j].path & M_MATRIX) gapM[nm++] = gap; \ + gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gapM[nm] = 0; \ + gaps[i][j].MIx = gapM; \ + for (im = 0; (gap = gapXY[im]); im++) \ + if (M[i-gap][j].path & Iy_MATRIX) gapXY[ng++] = gap; \ + gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gapXY[ng] = 0; \ + gaps[i][j].IyIx = gapXY; \ + if (nm==0 && ng==0) M[i][j].path &= ~Ix_MATRIX; /* not reachable */ \ + else M[i][j].path |= Ix_MATRIX; /* reachable */ \ + gapM = gaps[i][j].MIy; \ + gapXY = gaps[i][j].IxIy; \ + nm = 0; \ + ng = 0; \ + for (im = 0; (gap = gapM[im]); im++) \ + if (M[i][j-gap].path & M_MATRIX) gapM[nm++] = gap; \ + gapM = PyMem_Realloc(gapM, (nm+1)*sizeof(int)); \ + if (!gapM) goto exit; \ + gapM[nm] = 0; \ + gaps[i][j].MIy = gapM; \ + for (im = 0; (gap = gapXY[im]); im++) \ + if (M[i][j-gap].path & Ix_MATRIX) gapXY[ng++] = gap; \ + gapXY = PyMem_Realloc(gapXY, (ng+1)*sizeof(int)); \ + if (!gapXY) goto exit; \ + gapXY[ng] = 0; \ + gaps[i][j].IxIy = gapXY; \ + if (nm==0 && ng==0) M[i][j].path &= ~Iy_MATRIX; /* not reachable */ \ + else M[i][j].path |= Iy_MATRIX; /* reachable */ \ + } \ + } \ + /* traceback */ \ + if (maximum == 0) M[0][0].path = DONE; \ + else M[0][0].path = 0; \ + return Py_BuildValue("fN", maximum, paths); \ + + +#define WATERMANSMITHBEYER_EXIT_ALIGN \ +exit: \ + if (ok) /* otherwise, an exception was already set */ \ + PyErr_SetNone(PyExc_MemoryError); \ + Py_DECREF(paths); \ + if (M_row) { \ + /* If M is NULL, then Ix is also NULL. */ \ + if (Ix_row) { \ + /* If Ix is NULL, then Iy is also NULL. */ \ + if (Iy_row) { \ + /* If Iy is NULL, then M[i], Ix[i], and Iy[i] are also NULL. */ \ + for (i = 0; i <= nA; i++) { \ + if (!M_row[i]) break; \ + PyMem_Free(M_row[i]); \ + if (!Ix_row[i]) break; \ + PyMem_Free(Ix_row[i]); \ + if (!Iy_row[i]) break; \ + PyMem_Free(Iy_row[i]); \ + } \ + PyMem_Free(Iy_row); \ + } \ + PyMem_Free(Ix_row); \ + } \ + PyMem_Free(M_row); \ + } \ + return NULL; \ + + +/* -------------- allocation & deallocation ------------- */ + +static PathGenerator* +PathGenerator_create_NWSW(Py_ssize_t nA, Py_ssize_t nB, Mode mode, unsigned char strand) +{ + int i; + unsigned char trace = 0; + Trace** M; + PathGenerator* paths; + + paths = (PathGenerator*)PyType_GenericAlloc(&PathGenerator_Type, 0); + if (!paths) return NULL; + + paths->iA = 0; + paths->iB = 0; + paths->nA = nA; + paths->nB = nB; + paths->M = NULL; + paths->gaps.gotoh = NULL; + paths->gaps.waterman_smith_beyer = NULL; + paths->algorithm = NeedlemanWunschSmithWaterman; + paths->mode = mode; + paths->length = 0; + paths->strand = strand; + + M = PyMem_Malloc((nA+1)*sizeof(Trace*)); + paths->M = M; + if (!M) goto exit; + switch (mode) { + case Global: trace = VERTICAL; break; + case Local: trace = STARTPOINT; break; + } + for (i = 0; i <= nA; i++) { + M[i] = PyMem_Malloc((nB+1)*sizeof(Trace)); + if (!M[i]) goto exit; + M[i][0].trace = trace; + } + if (mode == Global) { + M[0][0].trace = 0; + trace = HORIZONTAL; + } + for (i = 1; i <= nB; i++) M[0][i].trace = trace; + M[0][0].path = 0; + return paths; +exit: + Py_DECREF(paths); + PyErr_SetNone(PyExc_MemoryError); + return NULL; +} + +static PathGenerator* +PathGenerator_create_Gotoh(Py_ssize_t nA, Py_ssize_t nB, Mode mode, unsigned char strand) +{ + int i; + unsigned char trace; + Trace** M; + TraceGapsGotoh** gaps; + PathGenerator* paths; + + switch (mode) { + case Global: trace = 0; break; + case Local: trace = STARTPOINT; break; + default: + /* Should not happen, but the compiler has no way of knowing that, + * as the enum Mode does not restrict the value of mode, which can + * be any integer. Include default: here to prevent compiler + * warnings. + */ + PyErr_Format(PyExc_RuntimeError, + "mode has unexpected value %d", mode); + return NULL; + } + + paths = (PathGenerator*)PyType_GenericAlloc(&PathGenerator_Type, 0); + if (!paths) return NULL; + + paths->iA = 0; + paths->iB = 0; + paths->nA = nA; + paths->nB = nB; + paths->M = NULL; + paths->gaps.gotoh = NULL; + paths->algorithm = Gotoh; + paths->mode = mode; + paths->length = 0; + paths->strand = strand; + + M = PyMem_Malloc((nA+1)*sizeof(Trace*)); + if (!M) goto exit; + paths->M = M; + for (i = 0; i <= nA; i++) { + M[i] = PyMem_Malloc((nB+1)*sizeof(Trace)); + if (!M[i]) goto exit; + M[i][0].trace = trace; + } + gaps = PyMem_Malloc((nA+1)*sizeof(TraceGapsGotoh*)); + if (!gaps) goto exit; + paths->gaps.gotoh = gaps; + for (i = 0; i <= nA; i++) { + gaps[i] = PyMem_Malloc((nB+1)*sizeof(TraceGapsGotoh)); + if (!gaps[i]) goto exit; + } + + gaps[0][0].Ix = 0; + gaps[0][0].Iy = 0; + if (mode == Global) { + for (i = 1; i <= nA; i++) { + gaps[i][0].Ix = Ix_MATRIX; + gaps[i][0].Iy = 0; + } + gaps[1][0].Ix = M_MATRIX; + for (i = 1; i <= nB; i++) { + M[0][i].trace = 0; + gaps[0][i].Ix = 0; + gaps[0][i].Iy = Iy_MATRIX; + } + gaps[0][1].Iy = M_MATRIX; + } + else if (mode == Local) { + for (i = 1; i < nA; i++) { + gaps[i][0].Ix = 0; + gaps[i][0].Iy = 0; + } + for (i = 1; i <= nB; i++) { + M[0][i].trace = trace; + gaps[0][i].Ix = 0; + gaps[0][i].Iy = 0; + } + } + M[0][0].path = 0; + + return paths; +exit: + Py_DECREF(paths); + PyErr_SetNone(PyExc_MemoryError); + return NULL; +} + +static PathGenerator* +PathGenerator_create_WSB(Py_ssize_t nA, Py_ssize_t nB, Mode mode, unsigned char strand) +{ + int i, j; + int* trace; + Trace** M = NULL; + TraceGapsWatermanSmithBeyer** gaps = NULL; + PathGenerator* paths; + + paths = (PathGenerator*)PyType_GenericAlloc(&PathGenerator_Type, 0); + if (!paths) return NULL; + + paths->iA = 0; + paths->iB = 0; + paths->nA = nA; + paths->nB = nB; + paths->M = NULL; + paths->gaps.waterman_smith_beyer = NULL; + paths->algorithm = WatermanSmithBeyer; + paths->mode = mode; + paths->length = 0; + paths->strand = strand; + + M = PyMem_Malloc((nA+1)*sizeof(Trace*)); + if (!M) goto exit; + paths->M = M; + for (i = 0; i <= nA; i++) { + M[i] = PyMem_Malloc((nB+1)*sizeof(Trace)); + if (!M[i]) goto exit; + } + gaps = PyMem_Malloc((nA+1)*sizeof(TraceGapsWatermanSmithBeyer*)); + if (!gaps) goto exit; + paths->gaps.waterman_smith_beyer = gaps; + for (i = 0; i <= nA; i++) gaps[i] = NULL; + for (i = 0; i <= nA; i++) { + gaps[i] = PyMem_Malloc((nB+1)*sizeof(TraceGapsWatermanSmithBeyer)); + if (!gaps[i]) goto exit; + for (j = 0; j <= nB; j++) { + gaps[i][j].MIx = NULL; + gaps[i][j].IyIx = NULL; + gaps[i][j].MIy = NULL; + gaps[i][j].IxIy = NULL; + } + M[i][0].path = 0; + switch (mode) { + case Global: + M[i][0].trace = 0; + trace = PyMem_Malloc(2*sizeof(int)); + if (!trace) goto exit; + gaps[i][0].MIx = trace; + trace[0] = i; + trace[1] = 0; + trace = PyMem_Malloc(sizeof(int)); + if (!trace) goto exit; + gaps[i][0].IyIx = trace; + trace[0] = 0; + break; + case Local: + M[i][0].trace = STARTPOINT; + break; + } + } + for (i = 1; i <= nB; i++) { + switch (mode) { + case Global: + M[0][i].trace = 0; + trace = PyMem_Malloc(2*sizeof(int)); + if (!trace) goto exit; + gaps[0][i].MIy = trace; + trace[0] = i; + trace[1] = 0; + trace = PyMem_Malloc(sizeof(int)); + if (!trace) goto exit; + gaps[0][i].IxIy = trace; + trace[0] = 0; + break; + case Local: + M[0][i].trace = STARTPOINT; + break; + } + } + M[0][0].path = 0; + return paths; +exit: + Py_DECREF(paths); + PyErr_SetNone(PyExc_MemoryError); + return NULL; +} + +/* ----------------- alignment algorithms ----------------- */ + +#define MATRIX_SCORE scores[kA*n+kB] +#define COMPARE_SCORE (kA == wildcard || kB == wildcard) ? 0 : (kA == kB) ? match : mismatch + + +static PyObject* +Aligner_needlemanwunsch_score_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + NEEDLEMANWUNSCH_SCORE(COMPARE_SCORE); +} + +static PyObject* +Aligner_needlemanwunsch_score_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + NEEDLEMANWUNSCH_SCORE(MATRIX_SCORE); +} + +static PyObject* +Aligner_smithwaterman_score_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + SMITHWATERMAN_SCORE(COMPARE_SCORE); +} + +static PyObject* +Aligner_smithwaterman_score_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + SMITHWATERMAN_SCORE(MATRIX_SCORE); +} + +static PyObject* +Aligner_needlemanwunsch_align_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + NEEDLEMANWUNSCH_ALIGN(COMPARE_SCORE); +} + +static PyObject* +Aligner_needlemanwunsch_align_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + NEEDLEMANWUNSCH_ALIGN(MATRIX_SCORE); +} + +static PyObject* +Aligner_smithwaterman_align_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + SMITHWATERMAN_ALIGN(COMPARE_SCORE); +} + +static PyObject* +Aligner_smithwaterman_align_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + SMITHWATERMAN_ALIGN(MATRIX_SCORE); +} + +static PyObject* +Aligner_gotoh_global_score_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + GOTOH_GLOBAL_SCORE(COMPARE_SCORE); +} + +static PyObject* +Aligner_gotoh_global_score_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + GOTOH_GLOBAL_SCORE(MATRIX_SCORE); +} + +static PyObject* +Aligner_gotoh_local_score_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + GOTOH_LOCAL_SCORE(COMPARE_SCORE); +} + +static PyObject* +Aligner_gotoh_local_score_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + GOTOH_LOCAL_SCORE(MATRIX_SCORE); +} + +static PyObject* +Aligner_gotoh_global_align_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + GOTOH_GLOBAL_ALIGN(COMPARE_SCORE); +} + +static PyObject* +Aligner_gotoh_global_align_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + GOTOH_GLOBAL_ALIGN(MATRIX_SCORE); +} + +static PyObject* +Aligner_gotoh_local_align_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + GOTOH_LOCAL_ALIGN(COMPARE_SCORE); +} + +static PyObject* +Aligner_gotoh_local_align_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + GOTOH_LOCAL_ALIGN(MATRIX_SCORE); +} + +static int +_call_query_gap_function(Aligner* aligner, int i, int j, double* score) +{ + double value; + PyObject* result; + PyObject* function = aligner->query_gap_function; + if (!function) + value = aligner->query_internal_open_gap_score + + (j-1) * aligner->query_internal_extend_gap_score; + else { + result = PyObject_CallFunction(function, "ii", i, j); + if (result == NULL) return 0; + value = PyFloat_AsDouble(result); + Py_DECREF(result); + if (value == -1.0 && PyErr_Occurred()) return 0; + } + *score = value; + return 1; +} + +static int +_call_target_gap_function(Aligner* aligner, int i, int j, double* score) +{ + double value; + PyObject* result; + PyObject* function = aligner->target_gap_function; + if (!function) + value = aligner->target_internal_open_gap_score + + (j-1) * aligner->target_internal_extend_gap_score; + else { + result = PyObject_CallFunction(function, "ii", i, j); + if (result == NULL) return 0; + value = PyFloat_AsDouble(result); + Py_DECREF(result); + if (value == -1.0 && PyErr_Occurred()) return 0; + } + *score = value; + return 1; +} + +static PyObject* +Aligner_watermansmithbeyer_global_score_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + WATERMANSMITHBEYER_ENTER_SCORE; + switch (strand) { + case '+': { + WATERMANSMITHBEYER_GLOBAL_SCORE(COMPARE_SCORE, j); + break; + } + case '-': { + WATERMANSMITHBEYER_GLOBAL_SCORE(COMPARE_SCORE, nB-j); + break; + } + } + WATERMANSMITHBEYER_EXIT_SCORE; +} + +static PyObject* +Aligner_watermansmithbeyer_global_score_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + WATERMANSMITHBEYER_ENTER_SCORE; + switch (strand) { + case '+': + WATERMANSMITHBEYER_GLOBAL_SCORE(MATRIX_SCORE, j); + break; + case '-': + WATERMANSMITHBEYER_GLOBAL_SCORE(MATRIX_SCORE, nB-j); + break; + } + WATERMANSMITHBEYER_EXIT_SCORE; +} + +static PyObject* +Aligner_watermansmithbeyer_local_score_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + double maximum = 0.0; + WATERMANSMITHBEYER_ENTER_SCORE; + switch (strand) { + case '+': { + WATERMANSMITHBEYER_LOCAL_SCORE(COMPARE_SCORE, j); + break; + } + case '-': { + WATERMANSMITHBEYER_LOCAL_SCORE(COMPARE_SCORE, nB-j); + break; + } + } + WATERMANSMITHBEYER_EXIT_SCORE; +} + +static PyObject* +Aligner_watermansmithbeyer_local_score_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + double maximum = 0.0; + WATERMANSMITHBEYER_ENTER_SCORE; + switch (strand) { + case '+': { + WATERMANSMITHBEYER_LOCAL_SCORE(MATRIX_SCORE, j); + break; + } + case '-': { + WATERMANSMITHBEYER_LOCAL_SCORE(MATRIX_SCORE, nB-j); + break; + } + } + WATERMANSMITHBEYER_EXIT_SCORE; +} + +static PyObject* +Aligner_watermansmithbeyer_global_align_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + WATERMANSMITHBEYER_ENTER_ALIGN(Global); + switch (strand) { + case '+': { + WATERMANSMITHBEYER_GLOBAL_ALIGN(COMPARE_SCORE, j); + break; + } + case '-': { + WATERMANSMITHBEYER_GLOBAL_ALIGN(COMPARE_SCORE, nB-j); + break; + } + } + WATERMANSMITHBEYER_EXIT_ALIGN; +} + +static PyObject* +Aligner_watermansmithbeyer_global_align_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + WATERMANSMITHBEYER_ENTER_ALIGN(Global); + switch (strand) { + case '+': { + WATERMANSMITHBEYER_GLOBAL_ALIGN(MATRIX_SCORE, j); + break; + } + case '-': { + WATERMANSMITHBEYER_GLOBAL_ALIGN(MATRIX_SCORE, nB-j); + break; + } + } + WATERMANSMITHBEYER_EXIT_ALIGN; +} + +static PyObject* +Aligner_watermansmithbeyer_local_align_compare(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const double match = self->match; + const double mismatch = self->mismatch; + const int wildcard = self->wildcard; + int im = nA; + int jm = nB; + double maximum = 0; + WATERMANSMITHBEYER_ENTER_ALIGN(Local); + switch (strand) { + case '+': { + WATERMANSMITHBEYER_LOCAL_ALIGN(COMPARE_SCORE, j); + break; + } + case '-': { + WATERMANSMITHBEYER_LOCAL_ALIGN(COMPARE_SCORE, nB-j); + break; + } + } + WATERMANSMITHBEYER_EXIT_ALIGN; +} + +static PyObject* +Aligner_watermansmithbeyer_local_align_matrix(Aligner* self, + const int* sA, Py_ssize_t nA, + const int* sB, Py_ssize_t nB, + unsigned char strand) +{ + const Py_ssize_t n = self->substitution_matrix.shape[0]; + const double* scores = self->substitution_matrix.buf; + int im = nA; + int jm = nB; + double maximum = 0; + WATERMANSMITHBEYER_ENTER_ALIGN(Local); + switch (strand) { + case '+': { + WATERMANSMITHBEYER_LOCAL_ALIGN(MATRIX_SCORE, j); + break; + } + case '-': { + WATERMANSMITHBEYER_LOCAL_ALIGN(MATRIX_SCORE, nB-j); + break; + } + } + WATERMANSMITHBEYER_EXIT_ALIGN; +} + +static int* +convert_1bytes_to_ints(const int mapping[], Py_ssize_t n, const unsigned char s[]) +{ + unsigned char c; + Py_ssize_t i; + int index; + int* indices; + if (n == 0) { + PyErr_SetString(PyExc_ValueError, "sequence has zero length"); + return NULL; + } + indices = PyMem_Malloc(n*sizeof(int)); + if (!indices) { + PyErr_NoMemory(); + return NULL; + } + if (!mapping) for (i = 0; i < n; i++) indices[i] = s[i]; + else { + for (i = 0; i < n; i++) { + c = s[i]; + index = mapping[(int)c]; + if (index == MISSING_LETTER) { + PyErr_SetString(PyExc_ValueError, + "sequence contains letters not in the alphabet"); + PyMem_Free(indices); + return NULL; + } + indices[i] = index; + } + } + return indices; +} + +static int* +convert_2bytes_to_ints(const int mapping[], Py_ssize_t n, const Py_UCS2 s[]) +{ + unsigned char c; + Py_ssize_t i; + int index; + int* indices; + if (n == 0) { + PyErr_SetString(PyExc_ValueError, "sequence has zero length"); + return NULL; + } + indices = PyMem_Malloc(n*sizeof(int)); + if (!indices) { + PyErr_NoMemory(); + return NULL; + } + if (!mapping) for (i = 0; i < n; i++) indices[i] = s[i]; + else { + for (i = 0; i < n; i++) { + c = s[i]; + index = mapping[(int)c]; + if (index == MISSING_LETTER) { + PyErr_SetString(PyExc_ValueError, + "sequence contains letters not in the alphabet"); + PyMem_Free(indices); + return NULL; + } + indices[i] = index; + } + } + return indices; +} + +static int* +convert_4bytes_to_ints(const int mapping[], Py_ssize_t n, const Py_UCS4 s[]) +{ + unsigned char c; + Py_ssize_t i; + int index; + int* indices; + if (n == 0) { + PyErr_SetString(PyExc_ValueError, "sequence has zero length"); + return NULL; + } + indices = PyMem_Malloc(n*sizeof(int)); + if (!indices) { + PyErr_NoMemory(); + return NULL; + } + if (!mapping) for (i = 0; i < n; i++) indices[i] = s[i]; + else { + for (i = 0; i < n; i++) { + c = s[i]; + index = mapping[(int)c]; + if (index == MISSING_LETTER) { + PyErr_SetString(PyExc_ValueError, + "sequence contains letters not in the alphabet"); + PyMem_Free(indices); + return NULL; + } + indices[i] = index; + } + } + return indices; +} + +static int +convert_objects_to_ints(Py_buffer* view, PyObject* alphabet, PyObject* sequence) +{ + Py_ssize_t i, j; + Py_ssize_t n; + Py_ssize_t m; + int* indices = NULL; + PyObject *obj1, *obj2; + int equal; + + view->buf = NULL; + sequence = PySequence_Fast(sequence, + "argument should support the sequence protocol"); + if (!sequence) return 0; + if (!alphabet) { + PyErr_SetString(PyExc_ValueError, + "alphabet is None; cannot interpret sequence"); + goto exit; + } + alphabet = PySequence_Fast(alphabet, NULL); /* should never fail */ + n = PySequence_Size(sequence); + m = PySequence_Size(alphabet); + indices = PyMem_Malloc(n*sizeof(int)); + if (!indices) { + PyErr_NoMemory(); + goto exit; + } + for (i = 0; i < n; i++) { + obj1 = PySequence_Fast_GET_ITEM(sequence, i); + for (j = 0; j < m; j++) { + obj2 = PySequence_Fast_GET_ITEM(alphabet, j); + equal = PyObject_RichCompareBool(obj1, obj2, Py_EQ); + if (equal == 1) /* obj1 == obj2 */ { + indices[i] = j; + break; + } + else if (equal == -1) /* error */ { + PyMem_Del(indices); + goto exit; + } + /* else (equal == 0) continue; */ /* not equal */ + } + if (j == m) { + PyErr_SetString(PyExc_ValueError, "failed to find object in alphabet"); + goto exit; + } + } + view->buf = indices; + view->itemsize = 1; + view->len = n; +exit: + Py_DECREF(sequence); + Py_XDECREF(alphabet); + if (view->buf) return 1; + return 0; +} + +static int +sequence_converter(PyObject* argument, void* pointer) +{ + Py_buffer* view = pointer; + Py_ssize_t i; + Py_ssize_t n; + int index; + int* indices; + const int flag = PyBUF_FORMAT | PyBUF_C_CONTIGUOUS; + Aligner* aligner; + int* mapping; + + if (argument == NULL) { + if (view->obj) PyBuffer_Release(view); + else { + indices = view->buf; + PyMem_Free(indices); + } + return 1; + } + + aligner = (Aligner*)view->obj; + view->obj = NULL; + + if (PyObject_GetBuffer(argument, view, flag) == 0) { + if (view->ndim != 1) { + PyErr_Format(PyExc_ValueError, + "sequence has incorrect rank (%d expected 1)", view->ndim); + return 0; + } + n = view->len / view->itemsize; + if (n == 0) { + PyErr_SetString(PyExc_ValueError, "sequence has zero length"); + return 0; + } + if (strcmp(view->format, "c") == 0 || strcmp(view->format, "B") == 0) { + if (view->itemsize != sizeof(char)) { + PyErr_Format(PyExc_ValueError, + "sequence has unexpected item byte size " + "(%ld, expected %ld)", view->itemsize, sizeof(char)); + return 0; + } + indices = convert_1bytes_to_ints(aligner->mapping, n, view->buf); + if (!indices) return 0; + PyBuffer_Release(view); + view->itemsize = 1; + view->len = n; + view->buf = indices; + return Py_CLEANUP_SUPPORTED; + } + if (strcmp(view->format, "i") == 0 || strcmp(view->format, "l") == 0) { + if (view->itemsize != sizeof(int)) { + PyErr_Format(PyExc_ValueError, + "sequence has unexpected item byte size " + "(%ld, expected %ld)", view->itemsize, sizeof(int)); + return 0; + } + indices = view->buf; + if (aligner->substitution_matrix.obj) { + const Py_ssize_t m = aligner->substitution_matrix.shape[0]; + for (i = 0; i < n; i++) { + index = indices[i]; + if (index < 0) { + PyErr_Format(PyExc_ValueError, + "sequence item %zd is negative (%d)", + i, index); + return 0; + } + if (index >= m) { + PyErr_Format(PyExc_ValueError, + "sequence item %zd is out of bound" + " (%d, should be < %zd)", i, index, m); + return 0; + } + } + } + return Py_CLEANUP_SUPPORTED; + } + PyErr_Format(PyExc_ValueError, + "sequence has incorrect data type '%s'", view->format); + return 0; + } + PyErr_Clear(); /* To clear the exception raised by PyObject_GetBuffer */ + mapping = aligner->mapping; + if (PyUnicode_Check(argument)) { + if (PyUnicode_READY(argument) == -1) return 0; + n = PyUnicode_GET_LENGTH(argument); + switch (PyUnicode_KIND(argument)) { + case PyUnicode_1BYTE_KIND: { + Py_UCS1* s = PyUnicode_1BYTE_DATA(argument); + indices = convert_1bytes_to_ints(mapping, n, (unsigned char*)s); + break; + } + case PyUnicode_2BYTE_KIND: { + Py_UCS2* s = PyUnicode_2BYTE_DATA(argument); + indices = convert_2bytes_to_ints(mapping, n, s); + break; + } + case PyUnicode_4BYTE_KIND: { + Py_UCS4* s = PyUnicode_4BYTE_DATA(argument); + indices = convert_4bytes_to_ints(mapping, n, s); + break; + } + case PyUnicode_WCHAR_KIND: + default: + PyErr_SetString(PyExc_ValueError, "could not interpret unicode data"); + return 0; + } + if (!indices) return 0; + view->buf = indices; + view->itemsize = 1; + view->len = n; + return Py_CLEANUP_SUPPORTED; + } + + if (!mapping) { + if (!convert_objects_to_ints(view, aligner->alphabet, argument)) return 0; + return Py_CLEANUP_SUPPORTED; + } + + PyErr_SetString(PyExc_ValueError, "sequence has unexpected format"); + return 0; +} + +static int +strand_converter(PyObject* argument, void* pointer) +{ + if (!PyUnicode_Check(argument)) goto error; + if (PyUnicode_READY(argument) == -1) return 0; + if (PyUnicode_GET_LENGTH(argument) == 1) { + const Py_UCS4 ch = PyUnicode_READ_CHAR(argument, 0); + if (ch < 128) { + const char c = ch; + if (ch == '+' || ch == '-') { + *((char*)pointer) = c; + return 1; + } + } + } +error: + PyErr_SetString(PyExc_ValueError, "strand must be '+' or '-'"); + return 0; +} + +static const char Aligner_score__doc__[] = "calculates the alignment score"; + +static PyObject* +Aligner_score(Aligner* self, PyObject* args, PyObject* keywords) +{ + const int* sA; + const int* sB; + Py_ssize_t nA; + Py_ssize_t nB; + Py_buffer bA = {0}; + Py_buffer bB = {0}; + const Mode mode = self->mode; + const Algorithm algorithm = _get_algorithm(self); + char strand = '+'; + PyObject* result = NULL; + PyObject* substitution_matrix = self->substitution_matrix.obj; + + static char *kwlist[] = {"sequenceA", "sequenceB", "strand", NULL}; + + bA.obj = (PyObject*)self; + bB.obj = (PyObject*)self; + if(!PyArg_ParseTupleAndKeywords(args, keywords, "O&O&O&", kwlist, + sequence_converter, &bA, + sequence_converter, &bB, + strand_converter, &strand)) + return NULL; + + sA = bA.buf; + nA = bA.len / bA.itemsize; + sB = bB.buf; + nB = bB.len / bB.itemsize; + + switch (algorithm) { + case NeedlemanWunschSmithWaterman: + switch (mode) { + case Global: + if (substitution_matrix) + result = Aligner_needlemanwunsch_score_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_needlemanwunsch_score_compare(self, sA, nA, sB, nB, strand); + break; + case Local: + if (substitution_matrix) + result = Aligner_smithwaterman_score_matrix(self, sA, nA, sB, nB); + else + result = Aligner_smithwaterman_score_compare(self, sA, nA, sB, nB); + break; + } + break; + case Gotoh: + switch (mode) { + case Global: + if (substitution_matrix) + result = Aligner_gotoh_global_score_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_gotoh_global_score_compare(self, sA, nA, sB, nB, strand); + break; + case Local: + if (substitution_matrix) + result = Aligner_gotoh_local_score_matrix(self, sA, nA, sB, nB); + else + result = Aligner_gotoh_local_score_compare(self, sA, nA, sB, nB); + break; + } + break; + case WatermanSmithBeyer: + switch (mode) { + case Global: + if (substitution_matrix) + result = Aligner_watermansmithbeyer_global_score_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_watermansmithbeyer_global_score_compare(self, sA, nA, sB, nB, strand); + break; + case Local: + if (substitution_matrix) + result = Aligner_watermansmithbeyer_local_score_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_watermansmithbeyer_local_score_compare(self, sA, nA, sB, nB, strand); + break; + } + break; + case Unknown: + default: + PyErr_SetString(PyExc_RuntimeError, "unknown algorithm"); + break; + } + + sequence_converter(NULL, &bA); + sequence_converter(NULL, &bB); + + return result; +} + +static const char Aligner_align__doc__[] = "align two sequences"; + +static PyObject* +Aligner_align(Aligner* self, PyObject* args, PyObject* keywords) +{ + const int* sA; + const int* sB; + Py_ssize_t nA; + Py_ssize_t nB; + Py_buffer bA = {0}; + Py_buffer bB = {0}; + const Mode mode = self->mode; + const Algorithm algorithm = _get_algorithm(self); + char strand = '+'; + PyObject* result = NULL; + PyObject* substitution_matrix = self->substitution_matrix.obj; + + static char *kwlist[] = {"sequenceA", "sequenceB", "strand", NULL}; + + bA.obj = (PyObject*)self; + bB.obj = (PyObject*)self; + if(!PyArg_ParseTupleAndKeywords(args, keywords, "O&O&O&", kwlist, + sequence_converter, &bA, + sequence_converter, &bB, + strand_converter, &strand)) + return NULL; + + sA = bA.buf; + nA = bA.len / bA.itemsize; + sB = bB.buf; + nB = bB.len / bB.itemsize; + + switch (algorithm) { + case NeedlemanWunschSmithWaterman: + switch (mode) { + case Global: + if (substitution_matrix) + result = Aligner_needlemanwunsch_align_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_needlemanwunsch_align_compare(self, sA, nA, sB, nB, strand); + break; + case Local: + if (substitution_matrix) + result = Aligner_smithwaterman_align_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_smithwaterman_align_compare(self, sA, nA, sB, nB, strand); + break; + } + break; + case Gotoh: + switch (mode) { + case Global: + if (substitution_matrix) + result = Aligner_gotoh_global_align_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_gotoh_global_align_compare(self, sA, nA, sB, nB, strand); + break; + case Local: + if (substitution_matrix) + result = Aligner_gotoh_local_align_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_gotoh_local_align_compare(self, sA, nA, sB, nB, strand); + break; + } + break; + case WatermanSmithBeyer: + switch (mode) { + case Global: + if (substitution_matrix) + result = Aligner_watermansmithbeyer_global_align_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_watermansmithbeyer_global_align_compare(self, sA, nA, sB, nB, strand); + break; + case Local: + if (substitution_matrix) + result = Aligner_watermansmithbeyer_local_align_matrix(self, sA, nA, sB, nB, strand); + else + result = Aligner_watermansmithbeyer_local_align_compare(self, sA, nA, sB, nB, strand); + break; + } + break; + case Unknown: + default: + PyErr_SetString(PyExc_RuntimeError, "unknown algorithm"); + break; + } + + sequence_converter(NULL, &bA); + sequence_converter(NULL, &bB); + + return result; +} + +static char Aligner_doc[] = +"Aligner.\n"; + +static PyMethodDef Aligner_methods[] = { + {"score", + (PyCFunction)Aligner_score, + METH_VARARGS | METH_KEYWORDS, + Aligner_score__doc__ + }, + {"align", + (PyCFunction)Aligner_align, + METH_VARARGS | METH_KEYWORDS, + Aligner_align__doc__ + }, + {NULL} /* Sentinel */ +}; + +static PyTypeObject AlignerType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_algorithms.PairwiseAligner", /* tp_name */ + sizeof(Aligner), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)Aligner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)Aligner_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + (reprfunc)Aligner_str, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + Aligner_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Aligner_methods, /* tp_methods */ + 0, /* tp_members */ + Aligner_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Aligner_init, /* tp_init */ +}; + + +/* Module definition */ + +static char _aligners__doc__[] = +"C extension module implementing pairwise alignment algorithms"; + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_aligners", + _aligners__doc__, + -1, + NULL, + NULL, + NULL, + NULL, + NULL +}; + +PyObject * +PyInit__aligners(void) +{ + PyObject* module; + AlignerType.tp_new = PyType_GenericNew; + + if (PyType_Ready(&AlignerType) < 0 || PyType_Ready(&PathGenerator_Type) < 0) + return NULL; + + module = PyModule_Create(&moduledef); + if (!module) return NULL; + + Py_INCREF(&AlignerType); + /* Reference to AlignerType will be stolen by PyModule_AddObject + * only if it is successful. */ + if (PyModule_AddObject(module, + "PairwiseAligner", (PyObject*) &AlignerType) < 0) { + Py_DECREF(&AlignerType); + Py_DECREF(module); + return NULL; + } + + return module; +} diff --git a/code/lib/Bio/Align/_aligners.cp37-win_amd64.pyd b/code/lib/Bio/Align/_aligners.cp37-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..26d918c849ea89c80a8a3b30202a890125f712d8 GIT binary patch literal 146432 zcmd?S3w%`7)jm8)CJ-Qaf@W+~l(8LcaIB_;wxPkA!HJ&18H|dW3TkW+gIcAUAhm$8 zlPKfKMA{cywYIg@x3#VPOJDgTUaAuUB!B`!tx#`4txSkYj2Z%7&i6cPpEH--0BgTr z-|yr1BXiC^`@Yv+dp&Edy?5x+WsWR|!{Nk#({wmib+3jFPK+d!?OCftfJiEm~;C`hv{8a zWq<2-eAzuF^9YAyJsP^u3eCrFo;=qhR6S=p93vzs`RiDQKz1trWjd-5RVA{22EMD1 z`JZMus*q+Yex=CynT|eYQjzIs(D-2etIu>4qKK+DGaTdXut9$7@qKw}cn9)Fx}pA^ zR(sXLT6}zSomFMF-D-!O#*Jc6yK&BywO2YE?yn*gc{&R5yAi)B|H?o{r>SU;*f;SZ z4+T%cZ_j{uvD4bdjg++_708^S1)2Z?v%O*dBKlk^?>P~v@a zuy|KpcLO4_FCF#x!?uUnI(D9!;pnM5Qg3O=o4c>YG<)hY zds_9D4SAid;8aM48WlEftDO?wlTlm9KSzfTWYmscO{zj+g{|?QW;+~R+3{-pXscop z7BIH*x07=ygnHfGBCn?+ksg`t$zgl)^vEKQ+jlVvaO*}-(0IRiLwp3Lc0;UgWO1xB zjJ@J*yK{4Vo>Wx3yOlI$Riun{xB`@_8*BAQr8^iYD~MEJ%=Da#FOfkj3mDzT0SsH? zwmNN!tK(~#j$1iy|AY2LrK2Hal!))t!>at1PvMqY}X2;*jT8|n4-8jB;a|TAQ&*;&OPQCHvy6os#8C^$m zd>=(W2aPu4@4E54x!vl>>o}sd9;-JK*Jj&eFU2i>!D^Q4sw17`t~c&Frv zFQM7-{UH6uSnbFsNQTc(1o^*oDv08bc+(9Kj~>Z>1|z9Q`5ca6re@FJyOI5S{zS41 z5S=m0@H`$tZP`8EOb1JC%YFw>oQrMQz1Guebc#d2HpY4O9>VlCX9i17h8ttwJ|MMp zy6wOW-59&m4n)Gm&tn2YU^3CJ5{^xo;*@2&vD0*YNgDnb*Pddtyf~&uXOeerHeFFH zkvbYXMGw#Ml%NVhPmONiYtU$Cw-kXQ7&xCVg~b~8Uh=o{GT;yz^Nkw(EGaWK^4n0i1a{MxN`zUlA7HtV+m^KZS?Xw*JL>?|c{! ziq^GN+2pkJ zamwX++}IF)*}WBLK-p!jPoS-iIK?V7h3j!a&}dm5y42yAJ0<%MyG^t1hkAAPD|o<~ zb+7#7=G``hLU!F1tH&VS+$p|-x|u=m8trjkz8*clK+oEW#}Rrow>}un-Ka2rLh~qIo1?*y^g^LmuHvchi~yX7V~K|h~8Y28LYsD#+a`8 zH|YkY59Y@?75_aM$-W)O#rEwm+wq}Lt+Q^6KHPfvsse}p)TH0O{+dNH1h%k%W& z7X1m-0{@WzSMXA77gpMe8n=M8zVHzXnGIQB-&VYC2v=8n8W-NoB{tGA+@_;2u? z0j8@CmC%h}#zDq<^zI$3gs};n+H`fT#TR5U)#lLA4*4pPl3cn~k4EGCKt9bp`QYXv z_&?0RXl&<4aPkU0(#T>!uTcr~t-gn)%0kw-l=Embt7WBtTv7#ry21r^gWs9tF{bMo z@WY$lH zKwc-Sye16D>-dA_WuCNw1j0P?D2^A{lOTLm9P_6bYkJiO84e7sL;J1YXlct1;g#KV z+xykNkl$}?^c&lJx4nPW^|kYSxAn}s{;GiQw(jd|uez>$%vQhgvfs#lY+|OvciRWP zy4srZF*}Xz{xLgm>pADD>+6a)7=8XR+k|el-$u&2{l*4=B-CfLM=tv?a?vN{U^y4) z;j#Y&f;D~NcIQdmXPTNnl6@s|JR>$D!%&c0>RUL^Km-X z3sq5Fd$9GUF-V?)lwU>4axnX2ZrkP1y0zQ(JG3gtjr{7w*HQSo8f%9ma#05=@>Xfz zB2V!KsTyk(>O-X>m&LUw-y9`5W+Kgtz_LE=$zAz3UEh{ngqpIi+Oki<6V?p*blgoB zGRF9ug2*gmtKRa4TOYGkk6e@oE|TXNF?B@UBqRHi_d#{-Ic?cpcmRz$dduVPNcKPR zR(n*q-QAYG4UeEGMkp`_KS<>p7BvbF9Ed1U~r@CnhB@d_L-Eeh%R?5fvCj zNH?=LAAuz_n%#yUEOa&eG1^VnIg-f_zom!2_9uj8)Qv+G{wR^g{sKQ{MUfu%W^kc6 zD~AGeMG?D=1(EsdO7>ST_`on}4BMmT1;l^Bsu%fY`Y!Ta?7KvdUgy~WcG%6v7zYBz zj^b@WV?Q|4&;8M974hFeTqg?(?UvV{m>{teu@8((uepFIk+$*9i!hX(xLSc>I zB&O?Zj4XH)F&VfNDq8+~{a$_F;zxs6uZ;42@&X9eUf%ew*5E`GU-;b&t>GuAn=kxn zM(udW;a4IP^!9Ia+1sN`*C@PqmLP6pZ|%3kpJix`Uc6|l%CAQ2bmO9*1A!KbHX~?+ zc840@svRA_2wJ_)3`C31Ivr2@kpJ)r9;!gJt%w%C6#TGrJ)RQ9J$=yB3!X%ppEC73 z?RX;BAFjBO@rz!9^e`v>*<1Lz330_9>=5+3Y@E<9 zSZW&mW{*B(`n?_Px9E2zi02^m`w6;b5c>TMG6DVGH_D>l0Ny(bQ|b4(gVOIuw}On` zCqWs|V07+22tkdGPyPj>#hBnuO!Q7FK|ofMWV$?p7tRw*mwwaaR~XsOJDFGQR5+L@J6;{~CS{La%S1jyT!zUl$Lg*I7^F zlSQw0;5m(6zhi#{yFp$^!nWOb(-mF#|j_r zGqr}N5U7WLlm}LHFCLAv+(vtN*ZA;$XWhvUyK9emxa^4{gs;}|M=Ngu<|)utU;4I&^eiPM=sTrJdSk+ z%MVp)V5U!_&LGBh5LSel@?EV(R-BZ%%g2P~3A#Zjoc8OD8|uEPPnn)m`z77jXu6(z zGQ$x)2kMeq!HPvJWVGpe7r|EP$q!h{%8FmsBi8{^7}r3rb7_u0de&6awGXkeGB-kY z#auqycawE2ZrWajF_Dh5g}+? zUWm2o{ksN?nf1bwA{|BE@@r>FdeRxIGsOvx`vYrMC4H_fFeh{VJEdCe)i%42p!$Qdv z>a*)XLsA#L?d$?cA6WvFa6%`?%XIx5pLOFLOw(U7kJNceUfg>Q<|!3Ort5gbl&N|= z5*p{Zfya#-@>=4V;f|5vI3$kS5g!k=fpA|YmqyntG^O)(=F;M>{c>+hnNOy#Ok~}7 z(a(8oYzk(z#XHgCU9~~u98mR(R0Zr{K=nb>wSY{!rlzD@!2mDQeJQ;>7KW0#oGFpR zpaY7f=jo9V$=Pr9oMXzt~@<8|ZiZnOX@T__{tK0H$Ca1sZ%>ln20*BA;6-ouZx z8!@7f;z2QkGTo3_nwY#cTcB*0bKxZrL&+qeStf%lFMbQT1(0npa$fqe%BnmKb%NgA`IQ-#fpKp&@gq!Me|t={irL>H6C$&M=5|eWq&} zQ{jsdH(d>Qo*vEpGunhO1Qa78rO)}0$jJ@Hn663$b-us|P@fkCj1PnOb%!8oHvbl( z+5|tvm~(OxbOVD0-l5x?+})KOGHUk~1dLbo#&?&@PTH&b#RvJpx{|*G8dziI>jl2m z7<*(2+U@zjg5hfmgzlPI4bHAt7d@>*c{#reREwX;_`e`bYp9p+>jR}9X-zfMgn(;T z>5(yd+gU>R!N|41YG_6ff7~3MnjroZ2EwNUSFjc6CE^UA_&i+#b>kA0vBkWgC^+RS zKJD(cKG;!G#)8G0uCq{0DXi-O49qY27InS$83REP^3JMs{a8^z5~nM(84L(87QzHW z?c6EIVV26_9oVxa2fizs0Eblybv) zJC+!;cAOu1nQQcLi&HP%zUa-a6==YHtV+~%uH686=YSrgYIT=A734RG&+Pu_PVbL% zRd!9ESpC8GM1NRjnY8{`GxV0VV$YQbT@62x@DZa@FGC;OMuPq0YeKKk9a!p3xKYzdc+gQqYrBl;7>-EwG&sMZ#$s?jP#S^+d z2!>aQR#ZdZ+Tg*0>IhYTR963~7qx~S0Z(7k8aG3=83YT}1FJDg__r6;TI8vK1ji;n zBH0Fv3_XI$I5HRk1!Uo`Ub<&dK#!gWcD-NsB7O)$gcoXKYd-6Sa|+m74!9B%`udP& zy5S$iU%h&bUb=3{D^iY>ErkV<kr3!k(9&S&Fwc1t^=lp7l%{RSh;JMyS?`}QP4$?PxSa>26!!aNW3hQ0tRE*hcBfP-*w#$)p_?Px`+HzjK#?cK_+{)q6(l4Vdc@XVH87_5i*yE@>kO zT49=y(G7a23U2`u@*k5EBsm}$T{OQ??cP7Ol*QiGne|%Uf!8KNE`13Jj(KE+WVA3Pb zQ%u*Jf1(82Az$ypSN1Hci*eXbQYFO);e=3#c~g)BoXVEWnV4gA0YPbZ)gpibnjZ9C z=K*VbT1aK?G94oy!#CZylO04j{iWzx=3MRi?hmxLdwaC+-dNM4Ro+6Dy=JI9DK7b&Vp&C<6_+**EM>a}7q& z9T~q=H(@};$AB?1HRs99xqHFXl$^U4Am`7JrJA1fJ6X@1p;t!kHD-%1GQI)Q4RWg? z7b$@ZZLSN1_rf@}S8M(=^xEuPHI{I7;oNy?!I2y#$<+AFcn$Nv%O%1g3I=blK~t}2 z#T3~=IeI<>XJ2IUvu8ynV-YCNA;C?Ju}SUhe8XR;s0>wdL&~bA^&}JY@dwqV4L8E6 zg_D59!KyqUse%)8usCS})o{>Br2CVd?%R^?e#IY+pcnD|TF$h{?N%1u=>9%F(P9a`hXSage(eg`!Uj{l?qm(-_0| zSR~-iXOVBQ!;I~L(w*8}R}1OR^vs_gbw1yRm5t0;XclaP9UU+XQa@?;m{i*JY2Ur6 z1bth1Q&HCfKUy&vqt;g_bLVsOS;!G21)n(l4d6BjpmSu(O5Jm z2nw8^s3L}KE9wT4=9sXyHUWYY<>g>%<>m-^e>H^&+ZSkPK6(mW!QL_6^O@_WMaDlH zFxvufM6k0!>d8#qy&yGpH&Y{BYU;Hh+lN7@f-X99K(v?{@N805f}?}hyb7e7LanLk zyK+^Dib%1zLBF;oK|%9=pX|tw;GS%ACd$<42*q86D@~6qnwaN{20b9Z7dTsM6zzVM zIgstGLULgT31*B=^f0D3=-lZu*QnggmH-wGF?z2P!AL5? z#Yt$Lf{oVv9c00-BI`C@gJxhMv>Ei4G{9u{|2IsU0JgV6umsRwx1^I@uA8n7e4qeZ zp^_X6xmFUi4@e-2gTEj+CG$#&d;fsA%)HWcg&F%ZC^Z&-MK}JwEMVk%^ymc{W}CLk zKic$6$aOEpuU_3o!EcvO*npqigVqO1yP$1=NE0+xdg{@pKos5l!o|QB7-Vt1=|}j3791(|$-xq3Z4a=nWGn5XP*8K_d$!(1)cvzXVVxIMOsW zhIdWy^|rX{vckJQ!$TP#J}qz9un@l=_qJ%YT5n5UZFc<|6S_v%Z*+Iz>2+hXZ)rZJ ze%7SilKNc}igNMXBw|aj^hK@VGL$Z&N{iNTKA#Qe34Sgn@A-}18Ag8L3}X)XxN#)> zh>`Ms8A0exm-&oV?;8Cc-No}BhtdE`YF53cwar> z&CHal3D;n^_`#J_zkCb*kw#zX<&|+f2I~)BG`%Y>vden|{@ykei>Lvey|uKt~I ztKAv(Hd_Gbv#E-58dCeo`atjyh;=Lmm_}qZr7c?Xm&N&4p?e?ZJy=Jey49XqGK0Vb zwC0VUBzeZ%a_zKND3WoSs`*p3G^1-I2E$YDojK8}`n|rdwCKfabP$`jJw;#V3%6tjs$0rh-0+4+0f`;GQW11&kwshn`0}fIW_a2NVer3lv#Ekg{8KODN(T&E6sJ{AUt07YS%S-U7`P z0nTX_;cemKP>&3C?+XH+|NNT_qGSQj_u^TAFfcQtf=ez%)S!2lg3Z9n%a|15Tjf_t zpkge7N|S zMSRitxadCvA5W{FQ*5H+0ZzOfd0_s zI9|~Y9G9iQje+2}1egOna`GX8V(B0HH0A@YAAhXh0xehdkb%z^M25|iTY37E_!SA$`fKEW!+_`(SiYDE1T3EnuuOUH zKLsy$pbJ?a3opY*DSza@hL@);%_r8Ilsi)JGM8!s2x<^NTfc!Q@Noi2D;l|B$JkF@df$TIX$*1r!g-~C@!*LYH@2E1%n?2E?B zC)wq3t872Kj53P{!^3BK6xOX-B2|k0hmv-p`!SiEn zY2aB$V>{nMeDbUgB&5Jlk@`~El}YG7_O+jK>44wMtMbsK(0F)a2Al=q4od$Th)6JP zzpQ-JDTG&q;BvD>a4;5(+H38;p?2`1fcJG6yhGl70qD!9?}4HTzF8Y;zizrV{Dg&R zeEW*&dJ1o7CCu3yYV%FkBX~`*Qmr-Nzgu8?MeyxPH{KpDfq??Q zD+=)oUx{AhHdYj_5-@vg0+d-nqJ+~#W&E}==x>B{lM=YJ|JY4ZSZUcdp7xNYQvD)e z?8X44_(j0ZgdqAwu*0;8PfJINU&MR_;Gs!8CIDYh~; zxDlKqRQ5}Q0Z7#(q*;JCyQNUsPy zu9h+^vk>AxjQ4~-AI$SO=zok+2@~ucWsvwf7ioCw>7CC`nz(V*u&n?3fBSKiWdePD6rv%LE`_h z4u}UqEePVO5W_E+;2t_qRB=G=LBIe93dWkdETDXd$kLMj6Su;DVm2MbI3E86}OBjG>MhPZ?KPp~c_qYv~_3I7SGe~yDP z7UCX)m5K=Dcubm#NCnv56P(yu!!m*;w#9MRed${U?rq|3vS>{U?NG z!HhY~hvh$UI`pSPy0@?m7U@1^`%j$2z+p+(YxP&-9RvI)(AZS{keXI${u8XNO@N2y zKhc}&Kk+zxzWV!5P`fmw|3o77(EbzBfkXLESm_h~6GEi@{U;{gp6tk@T#Dr{6{0(+ z|3sBi4()D44iv@ycIf^STK`He3(Ebx0QW3aRD5cQ9?mxkG;9+EHu7M|ou)}Z* z{0}WDoW2ybfE6@qrua|Tjw-hMrT9<0U@3^WSkQlhVv6#gKn{UJ@}D>%MLQ(oq0-)9 z&&bCK{Xg_Kk2VaZAF453Kf?!=c@#fV^g{sV;-t7hwe|&Pd`L9q5LU?rXaVGhN&Oounp}`!Px2k z6tXbUBWX~7iuZ>1r|3qZ@TaK2HYAAq&s+W!-$461%Vd}>kP7dBgMd`!QDKABZxfx` z9u*sd-c5j1qTe|ze~J~To${yn9qVa~2^g6n<2ZU#z?A~WIfyGoU&585_S~rR<1|-_ z?T% zfchVN&b|`QMAC%G0;}O9l60PUBh7h&n7W-k2~n#9zgQpA@pRP?c>0Db51+$>z~`nPfsuobwz6bvD-=%)P4}>vi&!>% zo-dvZhvi{W3kI7GpM{DPe>znBEdCW$Fj6;S6Sp9cKGxkrAXB7P&_h4Sz+n-{9~gt+ zIU8yE`&S$ecplong1y-vJj1`@@WAt-`&U@tS@~D2w8ru}&!PKQ927hc=U*}JhpImt zE+)?cqY^5Hj^qLK0t}t9^I#C<{B=9s36d_|zaoL0tJ3`|URj9363F?jbpHw>=cgos z(lXlMnP)5>l7GcO@O+JsZa@DDB><>Nb12|B;a}17CyR8?VB0OyU1IxJ#98%WN%uX* zAn8sY;9o%~H8_55#}{k=0qOET3_r67pCf}rZ9$WnYP622Ae*Z~k)t@DrzzF5DC4r!2;X?~2;ERG1^6`w`GT|3?3cZ`f7) zGS(N4Q$41q!q4cHFCIUy`+?OJhv8pQV*6K=)YA_1{~mrGUxzJ8srwO*T#a6$cg4kB zegGVgwKbs|fq>GlV#iSml-`4;c7DW9;8?;LAud#;zUc6*XkdrXvmz`H(A_Xaf6s~< z@vI;e7B4WIcA+)gC0VNj1!@=BJwxw`xdHEMI5Z^S?Zu9&g#J+ZSCpBqdGlGM_*YCa zU9;pZSNtpBZV{AM7`I6eI12zw98$Tw-NU_Gk1M1}6Sf@jI3DrE=jJZdo_!W1HDn^I`vf>vm3l}cMpI?NHq)B$ejaU4H^n;f`7Am<`=JkM?KKzc!Wa&`pJ4bPuo zLB6N)N9v>MvFdp^E-aU}^Xzh*En&Pw;C+4}yF|uG9Wudlco5Qu)c(jEk2`+)9^``% zF3hr+k0@ZqdLECF&;iBSF>p}$G=~x3_fUiPVB`_YNn)j^67O`UD3jl(728{fpUA`cHN?H+u@xi!~AD^UC{Wm6n_13}_iv2S2tCgKZ&VK-DqO`@DD2L1PR>>0|UJ&aG0*L1d|q)JVNT=lSq)td14LFkvdx9DWsuxyhwi*LymuY zvFQ+s7oYK=?C8M`O6>6gzQN%VCBf3qw5Fd(f5F6$ECbQ+!g&JQe0oCO{UPuB)KVnv z$(YU)f>E6O1AUwfzX8Xh=aCOw1@Slj$z1S7>~v=DoP~OTF`Qt+sGi4@mC%YFZ5%d} zZq{jtXG?IBr)r7c=tzje=y}whW?L4))j+%_vVV^L5AuW~9%w+!=^ zHm$h~L*+LxG3=JxeIx7?y^6u=)0&PIEb)jCIlT(dO(f}ncaNyPZXx-2ahNOYHYf(? zz=gbPNY51kZx8ie_3S*TlNK87Ap`2KM?}^PVrHz6F91);@s`b+Z4I6W@QIb?DY9k3 z(tTPJFW(4N9}|k^=LfY%)uEm&l$9k0+~IrH%>*rPW@5LUNzGaZ-dTgy49EKpd0*tI zK>-{mh{YtHsgzkC#xwkJv4z43E#ar!M^5Hc2~x_%E=H$ED{O%sxpEBx@@W*@%0o9c z%H1S9bfZN)V8mg=K6GQNcFtB1#t;deD3C{OFjwW|u^=nF(HW@5=4qwZ3uLu|XmVT5 zHT-VuJr15qYR&~;Op)NgKRV}eQmfM*)XY8-T+9fbEIazamQ2@riGriM?Gx+XfWbnd zNPlxTSOpxVutz#ze1x@fKV*z4IXuvb84!M%Bp-6dWMBwx+4u60r=A^$v6phNVKlLE_N!QjWMsX0>~u^et|ahN zt2vj$tYh;ZSD(nK5BqqwWwmwWL-*BIvJYVt(W{jL1+iABRG283rc*#!eq49L4gz^i zZ5M*Mn@Lk?>MzLRTw^8*GjMW~|J%@{{6ONQ5LHy-ph6%V4n&#Hco$TK1&bubvI?xd z*t>Y&C^lE$K+va~wc!nbHh6wB&QN0=BnJT#y(7k+9ujHe$Zq3!4p%q^K= zA0;UX#2{xbz*E=GlDSM~E(1sjgp`_#&>=}F5RT`NoN%lcd$RNmt$B4CJ;@P+Jjl+X zEcivZ$w;R5r4dUjrnt~p+gSEr5c}y#U|U}qEdj9e6JQS6)Ya~$7Ug@Qo1%}x5{5qb-0 z|F=q%HNxWILA=xiO4n=6Uq8U}6X1#|)B<{(Z?A9mT7_l9Y(ck`u!j0Qf$&=FoNXi9 zmo&00kVfGES&uqs&6n>_teE170;vHD1|+WQJUWIhZ*n(!_fz_?DKT;^47!^!njQkqGF*%SA?Rc+_;Uq+B5Lw0ZkI7+;t#W2D zP;v>hyq%3z=m<>xpV_H@jnt{71S0+;@|c1Vaq-9Z?T8Ed6~l-Hlw;2zA1eyrKt?Z^ z#;dv)6C`&6@DEcOXwe1o0IANU;|T|y=+Rl&xlR#g$=^7>MW*XCiSxYaI!PYT8l3-) zM^aZi`zAW}(VydR|H93dJ@udX*tuTnG^pGvhP->`5JSO?I#7UpTRL2cU7_d?Ai%yE zG7f~iyCB1&^I$%sI$y-plyj5LPOgW^bCYIqA4}A=+ln46cP|NE%f&ROMS_5$kj8H4hWNC`Gcz2B@SEm?Gkf{60`B7GZd2O52~x!3!mJ zTc7ti<>CF<>f$%nnJzz@VadZciNO_|eD9TP{c!T|M~uPNpDAfn>j#yG1xyMYL|3H9 z!(bSchi%b!s))Y5kU7bLVrb@cG5OfV^c$a*ABh`y9ZlA^ELQJ>m>O8x~) z0^h+3Yzx1q9=h;L!B;pbxZ}f;i7g1@5E`;Hve$$^H7Eq~ZoVb&!XKCNZl)4$lJagQ zc$t!SG7wq-*7q7)9@vMjI~-9LlOAK9D(bQq6EY=)qB!1@^nQr)F6EW3 zpIGwBqLUMPm$7`FADFZ0dUY$-7dwLy~vBDWWP4lo}xKqKp3> z!tN#!PUg@x0;rznq9o+ivddKS8vJ5oQ8UMqbgPDybVt}JMp@0`F7FaAZO66^!AmEw zy%zn1P!o}NH?WSw($5ygApIO8X)O9FORJYiYzGx~fs-s@w;ve|5O#%(29tD=#lI}+ zK9eLS3;#lxC=<#WLd<>TQq=_w9RUBfY)?tIYIsTaAv@i}NGECs3?4*XI}1vXwqTno z({>=5CFm-4EGLCBEtVB72I+!rHOP?)mU_mp#afYv@lqvX?er9}b}E%pOA&V{$#&|X zlC4sBh-8b2s5Rg5A1~IP^j&M%PD8Cz1t*AgU;&gWD}^|E60UGr*<7n*r{|iG)<(u6vEme?V+{fZYiQk3C-!&4oK;| zjiG@j!*jyu+(-H&u9Su7@!A?vKUJ(Sf-Jm1%p7Kb`$%7>sv@z8wBZtIId_f1sNiU+<4a+k`ND*@nBgssN|$oqey<0lAe`9 zlTcx$#OK_iQlD^hK7Pfjk*6M%Nda3%7vfzm0O)@?5O$C11NV!H`GWgJmm(dF7CZ2Z z(_snNwlb@y@rNdX#QE8HR>x)+#|SjOTL23N!XBW-1XnLI>`J#0p#l{08n3^BDTkpI zKxr7#_}mxB9+Y-V$o;|swkc`Is7F#5GCs;dTFRho5)Bz_a}Wm)vkz#mH8NuZmnqsZ~=RY^c*l?oB-Dhc`+qddLBSf$lK28H1-*iigzoIw`TeP zNv?y6G$_BYuM$DL_P7>c17VdsU^0%>Hi)7H6OJr2S0(UNsG9eVVVr@&n(zxt!^8Mt z5ZCjHV5zA!T_@r@W*dTO&oE=OQp6Z1Va7oD9MJ&IXNIJTYgdtB2V$TiI%n`bct+++ z@dJqjnX8?qz;n65s8jHqF^Wy$?zg;L8R wMX-G9JDSqhCtMk%JjAg4GR*ZI#Ar zkM`lJ=W+hhoz~vhQBVf2?aI)G$P;d1;D8_vz~>xb+ZcO3CwPOWo=pU~G?Wdn zZS1K~QABNQ$;8Lnv3SWKMotZh(PQi}&>MyOu}o_?Y4`xl8v&TO?ovwvS28bvpS`=4 zVjjto71)6aaHyf5X3r0*pBDSZ52}=YTE`^$IBd!5*RvT`-B#N6jbj)%th_#fF*r%; zC5=+xTTMz-4um3=Q=qTH)+E~sA58F}jU4C-IZYF1YT{lT9IL5%`tWd{m^hHu%4DFC zBUBpKnp=~iy7Fd8gMQ`bsg^cW<#FO-#tH6IWIinP8)K(D87XPx;EYH@KxO36l17dS z(3xfASTO^IrV`hi=|&D7RNaI)xWGqfZ>??Lz@=cs-G1X^qswo6V0>WNH?(y72AB)d zVg}k{b%^$jdA5DyP3-y*ymuNKY0*!uZQt0*>JLjluQLYer%ckI%Ed3IRVWqgttN$A z$Ew@J1L^J{mJRfQE}ftoA0)|VFua$vY!s$w9_+)YlevKSe33=}7vL)0woxUvji-}j z#MAV!a#z?8YzV}+C#X81AseSX2sGqs$w@4@jI1CJCwk)i>okgd%ucphnKsD91@A#? zfxU@Aj}CJNX)OqvBd1rn8C^CwUQwNoDLK$tRW zNLgGhY_4uIeEa$jtvTO9ft8piATfYOtVOmMU@ujNmajrt9tL zR?HEIDa{dUhs4A|16VYBtg=l2xwMh#)QG6ci~yM7)m!@3xM5vKM-sLYsdGQjDT>CJkZ*4DV~(EL#MH0 z{Sw^OhTgZv0nlfPM8!odXxsMnx!Ed`?N|3;S% zCgAR3ejsQrhoR_=uy^c+hgQh@1|(hd-siA)yuQe?cU&N~SGv%9Y4}f1z2SO~OR1bSVZ7IQ*QMV&Kq4PCow7Kwp#~S!ZB8L3dWX5 zOzyZ$^017mDfXx|d3fBR$-{?Z<4Bi><+95GPCXl({p8`Tl!yN@v^*@LFr?#?``JFs za*RKE7;dK!g}*Vt_VK!WS00D9xS-jwh7g6xmr_OHYcLvE_ESV*cD*gZP+a0-mcToQ zEDwwQ18NA7XD&aR^snapLkar_1BadS8e@?D=S!OVWfdPp9%cC33@ndLtcsS!nx;!l8Gng=(G=BUm z!tk?6GO{4Dl7~T4LkPpK`cw}z^g(Gr;)9|?krw0^%h6{LVfc4;vKBiTy?iXe*A{re zktuMJT}WmWe7Ot=YZvi{{yUTgV+ggSUuE&2KaVW}LzBS>39fEkd8aiOrOUiJ1(JHk zw6tILTGU_c)dS%{#9nAol-7r0FYZeD-v1V{_n0zk45wgDC9M>Lh`oEyv|?&9`yh-S zp4j`Sot|>guwrkc9rM3sJO&Ya&rKJ5ZTmS*1GqPt$tnI`B@eWR%)-;K;_tU5D#YKf z%Y%r&C*v_i{N03IG)bSI_wlilOGAINh|=*u03?95Uy5KBxm9w8S!9Xi3(1evyA@fa z?F0!mzNIxprQxtX(kyZdS5TNsVzpP>`dW(q=Ld{W5|)u-_}ad{EX6c3?>o{7mT6>` zyd_K{LA(w$ji8dSiWK3SSVg`n56|KLPdr#wks=H|ts)>A{3?w z6e5K-ap67Fqf^)Aa3m6jkY3yV5rVSCzCQGlx3Hh&72bke0}ab3Pa!p|1~@1aJjJ&6 z-u3_jdEFF3xRRKzom0_|SjW&g-6AM$0_fwpm=F^SkcH`b+=`*w_KVvI3*qJG#%IK& zWyFC;F@tm=$$_nBt6ti9^C%trLBfmCL4l}qdngJo@6tH0VL5QqwR&kcpEllfWx!aAlhyQl zztWnxbH%b>^gAyEx2f1yuE_l={q9^r$a7EaV9QYQTE#)ZdggZZ)LJ|-^$u0|HO$eZa+GemsGu-rM8f`W@6}q1pH0P_U3&p zh@?<`oxzUB#V$&p(_Ppq1 zZ=iY)_f^!R8};h1F zJxb5oOw~=WqmQ-VWa`nbXjdNcxTbK|0v=xZ91km_XPCO|EnwY1nA@S3DrTZJTmc~= zP>le(ER*QmOD3(&%zg%`(CC^L5)wPI<9UTMj zoPUATFU+(-GvH}+}UHbry zQ#Smcg9*et)430*N4xVgNPk*mAO7MzK*l5^>BK9t*n=Nc$gfdIxv|D{?O_zlUK^Im zW9hg%Q9>WD;!G{!fVztF7go-VP(dde%uz@8N|!-RYp$1JK>M79(s`hp^;+RR1WYc} z!|%ejb47Ouz1I9H>Sgz_H;1Gv8wD^{uW?}5&pWC8hHhO(17yot3E@ytH->oJ?$3T|j&Ahxc>ta0UqRCy2560PeT4rz4Mr<8N=JTTCPqBG(pgOMOEbsZLOqt|Z zDO{QQu%Y;I))&Z+F9G|-g^GzO&~Wx3{P;e!OZaifz$Q|~lCfUcEbiaJk1s|&2It4B zzsU)}k}(d4#FD>(v9RlFv*eT=nM3g7zsi8p1wS~~;>XVx+pIF3A79PzO3)g5KPE#0GtNn5FU&agUSx|IA7N$NxJJ^b)hEG>(OHMZj5~xG ztKK~Y%y|4M91QYeVaf?+>=xOQRI6Aq2-@PqN6NU&h)gCIJ|%|_*o(x?G`M)sTx)aR z$^_{(=i(N^LAY;BxGye$A@@Z)lHAuAn)}+)ZjuG#sjG7_)C4JH34b z<(|%uF4J`%UZFdrppHYeDJQR!s>o0;mbf$`x^9$5DFFva3kbLZLBwbUs8-|4b_JQHatB)QsEMp-Lt|gdezx*|uO@@Dx_CLtOdWxDdM7!@`|d)R_#Xgc~_AX*+CQLNhYj|X*tI3Cm;N4U4=G*wIj zqWzBr(Uu`4hqxBkT%-Ur*+LKU1P)t{qzQEPBO8bQk=k8dvq!8v#!l6 z?GZ!KI=M7py<8fw7J_vZNG1YG!$nwWGld0a;Q2 zIQMd{%`IMXbvm}n>VGfyUm!k3wVPP=cVb z*@;551~LO#6-FjX7z&MzwQ_D$yOdN@69NEW%V7%u(*Nm@OvZv@fS=YWOAJ8XU}PnS z0>rp(RzXQZ?-mf9&uo`@V_~u)`kho!A`nV!-eci0z9r|Pnsqv^vleKEO}Q#09ioAB z^wKq2^Tn2sz&yD0ljw!-l8P#l@<9TkojWu{yJn(VH0F0USP+f5SrDxjK?mp4-xQv~->wfTMFljaFvEHcp}WGL@1H8@mO*7>0cfYCtP47&a!B&94Kc@g++suhyfS zq~hj~FxweY)gf8-5ST3{1rf8|Q)ywgF(;>S?Ne;7{n$yvaqW-6nYG4`glqqWG0C;x zR$O~`f@{x0A;PtP$0(K#?F}2You4RSDA;z1m2>0&v0xhq=yP*zr9-0|IQcAkc_Ojf zL=K2z+F;efv=zSwQIi$cSdgus>TIH@&d9MVv6(Fuxe1>Z(6+U1;@T&2KY~co)(Zc7>K)u z-vQ!EP~Cwb4v^|kHK1YqK30j}p+H@~^VG!FHq)}Uv4<>Mnr(0k8l70l0!ExhwJJeq zK5#x|P^(w*ACMn0s4c~#=~{XsdWa&deZ+H2p*7fTtuJnwH_tjT0AR&p^j8oqh75S` z+zR{(RC_=@s6&8F*Vn8JN^F?7nXI|-U2W+dh{#cQ(|}sWAvO9S)5(se5O(2syRkKh z_y-%~cDqx+T11#TLSI0r9#LSe)((7HMO9$!a)c^`ZXJn0(&6SHs6o(8lyw+Fz^3Du zfQ_CAH@9rS? zHlZ9)J5Vtq;j{?+jZz4Q24TSINp-TjpsuQ`(ho=?9TIijccJAFGnV~m*&!*r z_kRW&o;1Z32B$PAifB3xw9cU(`yUABUIC_RNng7khM|`r9z*sl}Di$S>$pvycjE9fNi< zxW|4Th)DqH?^8Ct{jllbI68HhlpxE#zCd**j~sfR)*F<5e$)Eb~xGo8c7P(8h2wP`JW#t=#Y&QG!OCp9IuHtTVdEB<3yWi&JMe>B7~psY zd84p`5b;-1m@@m%5RE2s4KQ%n>2@AtkOYrK8Vn^!2d+}iZ{iw2EWYMCMT4|r_Q4LM zhu@>&tc`y22%AdH0EXx~_%t@xK{k<9omj1qIIWC&a*%`nc7`SQtEEiH{^zwNCM1k;Z2Cp80OI_7=`z*y3q>SA(`PVn*rv~93^siN(g?|+ z(Ng)8mZRY#qy=`f(Q3OJ`U0hUwdN4!qt$WAMw6&2i8rmOY;Kf{#p?@$TOq!1gt+qw zb{7(i@@flo4(=!fK(!{C%qN-kzh}E(8sv-w{*2iq%q=OfE#jCcYIF)|#HqkBM7P3n ziW+pfMaltp?8-@`KxN4yh4=wFKSA?wJ`~J_TV*Psp}8-B1TeOF&aHTB!FhhiqN|X9 z6(&#TCh0DxM;~;Z={;`nb!G#`n(GWUWrd7SWS!Byye)4T_{Db)Yq_~l4SrF`;=T{E zb{6+-U zP|Hn%zYe|Lc!pYUW?4(kg%HlwQgbc#{>WKOtB$c7Tl)`b=v53Ds-a`+;K6n+V?cHE zN2$9kF$0zsImh7wkTSQ3lRO4pVvfhUAWKXE`VLD>$cw$le%xnc;EGn*!~@>7_Ck}K z&rhDtT~q{4fV#^NKOn8Rhjy4HM- zNJJO~VKqczKrln{-Zr$^=Dml(ao@zY3-~v(($YiUvUi=nt&;8;}6)?IP_+7V5D0TP{P84`Y z_T8DT6ImawE4ERrXe`<5Cet<2`r_tzHjpn6X7!;QeAeKs*nvmFtgZMJm=#kGYNH{} ze?3ZNoq$1fmRA~_V(G9Ex`UBh&{Tjrkn~`0>K8#@t zvJez@r4*tLV;~!nz@UmcL2CYxgsM`3yQ z04cEsQU~P*Cqi##_n>ov5w{#*0GU-`tu0y;trW6b%K}{3uXYf?)en!P0j?@mJ?ff> z9Vl2MOxOKvtTib)yJb?I!@%U~FbLoZF$O2)e<6(kF8HCXR}vd`{Qz*Sqh_4LqBV1c zuu#`q(qgW8tca|Hau(kMc(cynYe0^y2L%)2`N`@`2maWYOKaSZC_}7+ophf7x%wb5 za3JUu%zCf}ZkbOaw<{zMwMz_HQEn&AQEub2bdDAscjuv85k@Y{$XUe8~b zl%nQRlS0nDKMtWduBLGpw#q?>--92uxHn_(h%D|qB^~WNTEj?LjFD>t0|z(qSeW@v z#^4Zc7a1IcD2u$z>GU0UI8yNYh+}u8Pu6BC6*z`;GAjze6SC=A%x4M<*j5;$BCZ7Y z#mTzwJ`yE)agm%D9dVX6j^~5gfW8s)$tZN0kIO|jk;Iko7u}^-AB$CdW}viFYubUS z4p@oXKCnT%NlwG@g9OA1!?ObGF>Zcn@5)fAxy!&VOK6|eWI6A^@;->Vy9}JQ9VTE= zcbCCLsrIt1>EhWAt#Yajy zD5bL~8lAvm9e5EeR;e`ytZ6D9q(y`!6`U@*G#yT(SQJYHU{CE(U<}@)WKOn)GC4n? z^y$}RTPFf3oK4>51MSk8^LSR@YPC+ZLYbibh=a9yGh?vT`2s7>ffFY{7zk0!h879K ziBsyY>TNX|NX+)+>=RGUv(ak1->f!68!afz&rNgfHlZt}GN@4s0u@TWoW~$$oBYsc zA|-UdNGNvdbv8*#RfJfz<||};F&peDMfri6IT{IhcQgGr+r)pfh$1o+FmUYEOV=xe zrgv@@ga&>lYy^@Y&Zo-lB^-+jhoi&*2OnoQ;aD7RfMb`AMW11|;mqx~IzNOAG2}{A zgWer%CE703J_ppURDbqLWcmM{JUAX3Vl@IK@cf+z&a}ALQLLU#|CDe@3D4xFQNkd2 zY%OC5{UZ(Oo(|DNEkr5mKP*IGz_J=~w@6|&(o0{}n%1MCvKl4vi0Cy3S&BgF{TgVK zI`o@t;O_5O4V=W9S~iy%7;9NpwXEgfyt>Nb)%EBd)ZXIPt5Ifx@504DSbeNt@$4O1(+1($ zxl;lLP8v}>n^OSTtjSZ4bjki5$geXnk?7M;22NLgm?tfMy`y#lTuRDnkKyLyBfyK+ zbdAGm-+5%8xNRK@i*T5G;B$(+;Y z(4jDC_=6;v^iE;Y!ezjz$)L%o$)tBAnDkDYN$=>qSYTb!Ir7Mn;KH!0IZ!oN&n|ob ze`aCa0`IJ_F+Kz^;e+}zjm0tggr?CFdjW}ri=P}CQ^}imq zR`tJ*kq)Q+>HerF&guRp9ZUIYFuTT1!a2SdZerSyp9TL8v!bpj*b&ac6noWY1+Id_rXtci_>aA(De zKe?Qz6BA28yNhoRpnKs7#@DL&_8;Q~Vl#DJoTUwOf!4W)5b`LH;#4tI292sbq~OVm z7-4YEnt~v`Oo;g1@Z)u&vydAP(9{}QZL4a56$4Gfc z;QbrOJK+5!x&G;GPV8%h5`$;xeI4O2Jb_4tLTP$+ITnT&F!1wfi@*rIuQi`0k_B}1A>$QM zTi~RHpmz@JI@+Vxf&8Gp2R*D5-AJfq|Y_ z?6(y`*hjO&J^P#(ujN(|+ZT+{3SaQ?5C=KF58<BZM>|1e$|?fNU0@7x3=KJ@FfY zSzGM;@2XqL`cFvqXNDfVAj52}pOb0s)#dAwJy@o2&_wYzJ-j!g4rj2^?~ffHDBZUB z_%!<@p0B=R z!r4Xml}_t9OUs)x9sub%JdK+<9u!i!1#R7iF|E81;nZ%k&$ccYg?pfU*pbc0q zQkzHf6*)(fsAlEh@3?bu%Xi@h)UPuCD=YwmnjB%2@WQi@BOR>Pn$&7xOXE4T3$`;l zlI41>YPIu9s}~a0N>YVYtsYkE#bU@`espDcJqQK;h^bc=+wCOB#3G6cqp}{YY&6cs z#yI(wI!6agmXjCs=BQDtj`>!zW^4mGCBOrJabru<=@221HP8Mdq=df-CLM{s9+Bx; zQv>N)jV2}rNZ+cr(EXcT$09_LJ?ZfYOemglxdWu%l8h^)zbi@l$RmyPtJ`61hc^`M z7}P^TeqQm3o`v-uTp#6mc+t~`G7H|aW&tU>y}<1=1`4O?c!Ml~a-)kz5vIWcKnrWr zItXo4+b>KmSJW3&l0T!!$-+_DDmMuS_EqPWKpRQMQSa_x(0C6_8qYhp#?`w~6rStd z8o!(vihSKCx)%&x^b;)6^JN(K1aMMsb0#uj!6l(61V3T4I?g249;I%L?6%z493-y| zRlh_du8;s$gBlE!@cly2(hI3{r}*vWbbh-to!@ql--2oO=eHd;zg>jG-K$6HS)9K4 zGI87>vvg+rM1rVnX8Zd@h|O$CW{TOqxcGwU(fpTmx=kDiX1z2dza5Icz;8F${Fd$K zV8CBxAisSnL0=m{UrKo$G+qQdm<$h)fU!YwT<2c+V?aIQET81eAg?Via%Par=6;3n zd?6A1==`=A<{ZyJ`~c;*dFW{RuRu_b9&`7b;OJ!JcSXp0H__-1@e@0 zp@BsL(tr@0P*#$A@8HO6@~I=gz(88m65XGHY)Gb%v3$;#a@3b%ATpL4u=DL83`DM* zdw; zZuu9&^YAyXHXF_F8ePA^-G!&ui_K|Eb4Q~*dvTtCwHX=-Sg@wzbh@&>@NNyIWcIeG zoO@eT#tj=JU)ZaV>B!m<^|_g4T_Yf~JX;J!TrQGZYkgXFj+VVnZB;PS93s0!UiFgK zttu~{*~+EaUXkGe)>^i7>Szc@oEy!1F*nTCKsW|802+d$-(L|jw_H2z75WEqEL0~} zEzRhH@17I9b^_1W5zrGI``{@s(qQ^nmufA8MY8LHknwg`7EPFty(M1yH3D2QU?mzQ zYP<#exDW5G+ELb`e8UdBOYw&LpzxPYy?HW!Ef@uKPjrZ)Z`y%Bvj6d{)xi1Z zK8!yX-h)5s+aa&;T|Ss`m-u-Q{5ezbX9-H6;JXaJHvYT^&rkyse*%Uo{E3T-zX<$E zRC;9r(5URK72oE>7ioM1a<=qO+r(`QlL|W;h-O&)2`9Uc1bfSHWmIq?7a(oRn?jIpO63nf+wjt z3YJQ$sf~szify5xHb)aZM@}?U)KF1kk9DxMGguNuM2rn|$ZQVPxpcJc%-CsX#uaQexMYw?w=#zOIeJ zBkkLnq-6+H-z4RxtWf~WwpI>pk5+?{=40^GON=XdW}oeee9-npuCqOnocY|iJ*_i6 z;;ZIA1czDD6?0S?lDp$uVmH|mdtjkPu~6qEi?^ADI;=kaKJ8PPrXR6*yKRH*QH}CY zdlZYKRuPK-*BA|a6n};>N*L(-H%2j^n2^JpqNihu$|pWEZHTg3$?OvKH(^9+3=wdm zM~isluTS?L_F_Qz&kY=%W0UHBK>LC5`W^;pZFPThxVm#>qTfrS&K_lgnMx@aUl!40 zeg0$mPGFDT%pJ^7S)(#VBiW`XkvXPOPPX`h_tYmj5nTQlXMIk^`pou^p)@>_whx^B zDbzXpQ-%R5&lG2RR%T33RT*M>HdRa|ZX>We%gy(YHag?&g|GSJkz;!nzUIc)_+#1v zMMP@A);#T-p;$L}yEAea;|8-prM{?A_5m;pv>Z{m)mfL~rxSb&#WzaJoKYHUpN*oF zW0X#gwaz;6m{~c`W$e$u>9O0lKdT9g&f%|nhvlzBN$tKgVVO+| zsr`HUPC#l}RS})Vs~^5GE|_OJqZB)4JBF!GY*T#y$9vl^hNmX*CTsLK5#P*=zXShY zjy*~>^i|Q*FeWKo@oiGrZr>2Qv`TgD-Dw57{o`bYsMwF>l7HDGJ%f>2`*CN5(ky%~{J|@A6PP z3p9*<7>{6HbUF92NQX6Q%O=>x_}evlqb=Ohb#OYz%T z^0W8a8#C6a{6{iT&tQPX52_B#NtShHOw>u*ul1h%WM`IrifL-*VYYpGFxx(jv~4WG z6pge#i(vx0W`2@u%B*e zY>qygF-I?E@amIiV~xHt$e#Vb_9*(#;o755FDq#|g0aY$qhp9H@vYHTNRq=2o?ljm2-1*RJ_@vSFJQ5lMS z_kEb6%&{SxIv3_HNe{9|Ia^M7Q=9k%-ikF9cT!-*6AeDHXhuqQXuZd_5!OL%S#;@b zn2a^6`LVT4S&{R#?rAHWW?|0cDq~?bab;P1_3Px<&G ztp}@`FXi`B&Bu}?_Zz|&XWO2_(xi?MJcermF8={5P~9ZSdwC7v^e9e`vYqsSbw7TIWOFFsH9gtT_q!{A z64P2A4fd+uq6|PpP&QJF2yjdD%1Xb^+iTz!c^zL+1rvBK6YUXtLlNDd_Mc!*yC;7f z??3sd|D-%PD(_LA5KUP^?$Ua1;Uwp0j`ob9BYByK=vez_V(q7Cp}B@n>!XXL@>JGW z?+vYCVkq%XVd$I0uCgZbSD$+RMX{?dS!```%6irxy6Te`6~(Kc&?4~8P}|djGg;gd zB^RtM4M(eYgxY$!TVK6HSz8|qwLPFZFwpW?^1Ml*%8%Vaqof0({ZF(|jNab%Y(c&E z5MJ%Jy#)lda>`+>>XA6wP1V-G-xw-gFn%9l7NbI?^Y@Y1@-MglA+%!Mm7&Eu^C%#U z-rbJ*&YIYpJIJHi#fH)C)S8cbx5FxN(b&Z@Qth~Xq0%Yi-OKN850y?%zoP0vwZd^M zCE~{JTJbH(U3?buQzSZK)eqiOr~W02*VD&(Z=?0GCQlI{#8Q{?;SOvI`p^-zb!KGc z{0?kRWgRWGW{YTK1^IFb_6N)N^IzocM3G1reNwk5L1YASX)f0$ufjd~SZM79g&>R` zEs7W)MG*B8uGO1Bf%o`bD?v#XH*gYY&qZ>vc|cw5ZB^o;F&wLv^4It zu8OIs_2cFw3y%hyB@4zDMIMm%L(+LVI4>mpw-XDE!p4V(MI)3OR#Ar zN%}Esojdu`!>Y_e>AZs6HCU%94O)ZU$ik-L<92B+Q{B_Lt1ad?*_TU%&8QY%tvkgt zKS)m=@_SM?3CJ>w!6SDhwXTEEiMH-S>rF*yd>3n~<5=R*a){tTF^w0niYhlQ8KEBx ztvTD2%VsT}Lf^FvwJ5?=Wf2)Pg~BhLQxxEj?{9?h^Afdp$a=M5eo_e4@Mj}*Z|EFw z$y#To?~kYZ9xQ=wXCYfiES>?5cIFJS2DtdeSYi_`Y_X+Q{6Myrt_e9kR?w@louchz zrBhzikd?ilaTMLz2-Bh55w;Em<6HJ@cF#7Zd&U>ss+r%jVTM=K@xlRNv-kxYT7tjh zFS350S?NDiA9y7g+Ii0&c%Qw{dYFyw`Xb9W77X$B$m+6jXzSt;HqfQT&qmp)>h7eI zC#`WwNbYuyxZei8ynqJ{j5eIc6U+bU?P&5Wagd`qcM$}aoS>g$xUn3e3rk{gJKFx) z*Q246U?9IXa-l{p^Ek~`RpZq!gw`Gvn6I4G3i}|#UBFLpBv^nbY(ARkCu}|Lb!4s2 z#1kL&-F^&bKxk=x@<{f8XrXd5m+aFK_9wR_pU~ykCYo}_~!c>kG&CMHI zWXKS86?coejGt`Ob>m=uvRl-)N#u`Q%TQau=yuaFKK7JI1FBgtD&Q%H>?1ty)llW0 zpm&3K$|O1hPYG3Sqi>DHft3%6-d|^S`<`Q9>A^3`55?>kR^A!?U!dK3K zuPCBM+(a_M@fV-3IJuYMDq724pTkfdig;Uu(Jo&&E?6I66t2>naR*!1cexc-SThaT z+|^L)EG0JyY%LP|IhuHccvY7FD@yhj6_-HL#gE1k+xU1cdx`vy7@W^v%3wdGWsj03 zr?Ko2Wzr*xhY{UaM}p0xz+@(j#2(g#&M`4MZKts8SYMxv^DP;lO2A?IoKMBn3!Gb< zwD?BwL`8`8Jhpc@{=zV`P)tk3+eCn~w`STsAdk+rfv|evEbhE;Ez5ODIauk+ZLSTPtWs$G?+ICD!D?WfD~=!6GghO{ z;3^fa)IC-qq-=}*NDr6ZFB3qq8#DQfZtpaH3QpUPCtY|lXj{_3&kgrr44kDX;qNVm zf%5lA^6loKi?tL;9n6{h-s%32e{ zz6SLH+h?i1qhMAu-`pZ0W{NaWXTd=8X0UxZZLoDd4!V-zW5`is2HRC!W3Y{yXq~=U z77mTZYWQ)6+x@M&#WykBFx6xr5i_q3^L1<`-Hk0CnC;&wiDgW;>jTq`_o(K-=YPY0 zwO!52bZes1cz890_fRy`&5dqgy5*@xV7l$&wQSQ33BVU{lv<}6oZU9^9knBArzDA9w2|?6$ztYYyzT8~rEKgB>Yf3Z5kVCzup&bHnF(Qtw?< z5>2my)_d3b=3A`&v(fgc3NV#(G}=y7unT_D9=~$4*^UAe$|k zoYI5M)*VkS#Ae$in{8L9O}pv{<>iTNHnY;O+1Bf>f_u$oyU*}|<%3}dC;r&)9}}X@ zX6vMwWwViL&zWs8@6l-0qsa4k)wWyy-?P`8GUF_^3{HL9Ev8P&T&;SPe$&ywzI686 zlwA8w>#(rh5$w00o8!PV5uGROrc%k(M)9c4PFre7o;yk@{Lz>Jz6X&rH#ol22xm|1y!LrcJvEwcf+^ z?PF!ynMoI#zfK~f2Pz+1-(Fw_-A7FwTOfkn7Og^!j_XHz^rb*%ugBDC{lJ!xu~wr#p$H`IuTi;b(J6 zUG+X&U06TBr+aL-D1xv{k+#6K1(r-?vShWCwAu-#o6V(aAb0~)V2c@hX;nf-0>&P} ztr=sFm$U4YjcJV{kkztN2-)(+@Y1d`Pb)5IeI|6~bd(6jW4=CI8cSPwWs}uCVtq3R zp;J42qq8g9Mxqa7PRGSLJFqs_vycu2~I-3FV0S>dMmxq!VejqcE#UgM4cxZ6y4i*=SNq(|{Y^Di*>P&J|A zsj~NSnG1I(IPPMcsR`y{b{}Wd5bh77YZRg9@Rw=6xySBi;y;Yfbm(&;s%X{jcoo4P zZ!(>y=2=f_J#=Vu5iqS7E%PRz{69Q^dhR=oxQ)MvrK`+z(t3Y-=mzr_MI?E}gsuAy zBlF0}SyYh2WZcTU2+pO=9>QeIV`EIl*bD9xfNdCty^LV{L3y(M0)NB%^d~8Cn?yY? zVg|k$K#^+$n)$cICP9IEMW-^z33!dnr}Qih%%@)0DGUS0ZwlTIzfnhghO>TH-x5wG z<2AXyrDKNjnlzhn1gOj`4%`gmF=_~SPT*ZKW3W5T!2@WVF=%ft2eGy%!eOw?!P8(QwXSgR-qW?{M)A>H_gUrFeQ z`_ysB(4#^j|H~c~yD{&Rj6*Za^hI=J=TKq+w%g3IT><0DGRs`V_YkwJS7zD99LumS z<#-S_r7S}_g=rXSoVG1!HFQd!EW<5BEyF&#Fkq(arl7J}w6Y904r3W^mxXGU;l_+* zSUbM2cJ(x@JN)@F5gUQgaURyjbu_zg9zI7mOJLP!4K@#D48C|R_L-}_*S8mC9loBi z4xcmY&~BJ@cpI2x?e4XdFdtZl=djIK*5STb+sS4fo{j3X73)xEE7Bkq;Os5Dm$nB_ zwyVH$Eaxf}9?!3=K&kYy4j1t^$VVa*L)L~_hcYV6I+Q^n>u@nQK;55d9TI5To!O%~ zIDZgFAzFG?3o$k2-#_ed#uIVxKkuLi^`!5hyXs7x_m=jeE{0ZFhzp&_E&-AOV(byb zM>ywGMbwdc8EN7k%B0G+&ykD{D%&ncq6<`>Pt=9wbluKNsqoJ?2^((h=`>q=0IC1N z&EU|)155_9wg1Lyx-==zakGwX0az>2aZQTUw;z#-BII=Qc>7Vr5?Ve0Ef7W9-eT9` zJ(OH@CpODPI{l9|r{bP?^&WrI;VLR%(Xc1+Joq{L4GHd8ve@D}W7P+jTp9JWiIx)p z*JW)ttYw^X_ZzZ3FxYaK*@`#J*26cmjsbN04(%8~V9UUQkr1u!zUicx_XwuN`e@Z7 zoEE|RRb^VttI;lL$p1P3pMl;Hgf#hPTpo+3R{Qiz%rHM zvl3(R(L5Q`&XI*C+v4l67TpfAtptHeZC2hAbd!?HrOa-}aEKvI9@N(tj5GO%iqJT3 zJU6X{jl9vz{~ferTfQDqJNkgNgH}9moLbsoTuOekPX6O0YRnhI9DnH7QDny#fj`;m z3Mvvr^DSY4NNm z2Nv^brE(}K&X2cGKCgXBzR+a(p}Lpz3(8b@qesR+6aN(o*kY;h*LgHhWwjpXUd9`@nX3%<{Vi9i@SXOS_){gE z2^I=l;ul85FD!^(g*M&%;4PptLJYo3vUW)pdmJ|Yp0i?&J>H|CP5h_$Gdlvs6iZa} z-jOE_o~XEc#}EAt-;)BSXzZ}b*kL*4dBi-69g0n|G2hss!XmD4Z1QsX&#rcC@+z@O z&PBv)60pbH0`}O-&44{#7To-r)pYDJ#?3m;qsNzZCw-6I4>9xa)f7|{`89pV&2>bb zb{w!)Oq!4X5rD=Yf1zi#D|-a~sIcWB{IMm?AHNpx$M@KeJtBWp@V<+B*KTZW=ox`U z68@eB3s~K}z3H^UOfvAlFclrM97jz!XD+j}q4j>RSzct^?x|SyL!q@hC@XO1fbheb zmxkTnzoyA}0w~O4pGP2Z>f1wcvQUE5wNtGjc=e<8;@IMoS?f=pu}fwHC)REuuy&mS zYkSQf(P8PUI&nx;O7YV^Puqt(EEiR*NQR4!H!dn=J;OyeYZ~(Z5L^^K=(AjqgvG@F zhyRao(W9U`A<*Uv+oq9ALUn*=ul8TWJUCgo3_e-5N9d^iv(OQNVHp>NIdRs@x?r?2 zG-7;J>(LsyqcTD{%~@mZV;H~~iP~YY8u54{ehaakPlnbeHU4t!ornIH%~@Yg#{+t! z=)6QzZ+-QqKW{wCW|3%aShjoOdF|7T?aJj^-M##8)kP7iz9~7OS?(co^nD>P1U(T{ z*=s?Sodbn%!1ZFoMoYO&_=YaQ1{3}U1!KZjQ7|UlN5N;p7phizN#q=^GE8_DSE=yv z{PG#tmsD4=OsHtdUVd)aj;s8v4u0lniNjBXLMhRzMGWJ|Lv}?CTF*dP4bJV3dq22? z)`RFX3OX zWH?x2(0-Pgv#XjfpY1};#tIwBaE;~DA=K#wp! zh|8naXt`>x1`xZdBtupy1|A_1eWYJfHn|rt!NUaOGnWZ)Lj0FzF&)x2#TFj!>l`GUrYT@-eZ2Z9KUnM`H9aI)Oxxo9Wr+Szn<$g@hUK)y ze4eBru?CgV>u-;HyDU^}Yj&s@`nIh(Z2dEr96As<=@2m#)b|Jx>rjZ8s%3?UW$;H^ zvIQc>jm#FP*FN#Bw`hFdhb;bAne0&RYctyk75`@X-Z(2VM# zK#WS!83C4BHI?}-wq3*DdX83DyVY*2aptU64qUuEFMcvYDmDHdTjxpREny@r5X@gK zaJ?YB*%#?*s(*Bo=urjor z#Wd%}Zlwv}(JnE#GV-Y7nJN9E1Yya7QZprR=8G{(vZ+pONfDPa6)fnYxRjfvwWX4Q zlANRT<~*yTS8Tn#5PzYaom|SxD8;0Sz(x?e32@{nIKWh9zydtSWD?Mb04UtUHdF25 zu~c*G(3fTD5ah-BIR`-$^di92(K57B5G;{qYc#-BM%YHBTtdZ9^NRXsH4Am+Ai~mk z8XK#$@M#_jp4vSJ)Yv@TU5L@OP*BU>Q2v8D3B;_S6YNLH?IJPlkDr81l*?hp@!Z7K ztKW7bsYAku0gii5VSqf&K-vfrhRBLt5Ao|}e+0wVZNq1%#pscZ;SV247on5)Iec2D z@dr+dk_N8Pt(jzo&%_??^+ z8W;>cMuVBs2=a-Bn8pwo>UGFFHx~BstdzUdXl4kgas@-ciSS#wweT5klveOg$>|!q z)Ayvpui>UJN!&-lOrJdzR9RtQ52nJ6ztUuJ9CLv51K0<@(K~6m{1dJ+p$b3XDi!wl zg{Aup-&b(uGdGFANSlPfrjg{JVp3{<N?Nl2)SJTK=e6G<*vZ;|AZ z*-Oh~6dx>!L;+@qWz6P zici+fc1t0|nS)85KnWnojmb89}NyJAY=19a#HE*(+!0UYB-b-6fO0vF+jOS8fax>bHR>QTh zKUspGHgFnh8PDqI6=_V3zT{B7QUpJaFA4th;dmyR-Ec|jF9J=OX&|XDYO6bb;6$OOMEd#`i3Nc!u){_h#Idc!>wOmFxP z3ez zTj-#oNq^H8W^x+>eGhE`?cuJd(;AS-z7yb(`amw7Rx5NMH1&aznl)$dtWbMMTDeDlQ`=t(MbxQ75m9h3hG%g zw)HEmZ{`VRTT`#~U8j-c^#%3HF*(iF^bj=>ub*`a?KmI+ZKeUDP4DHPeIt6!KG0W- z->v$M&{{oIo^~;&AlRQUkYC#rtWRtv>qjg>z8|!&Sb=H%#T^QZ5<6vb$9EiX=8$%} z1ZgK}Do@$^x?J?MR??;8pp2Z-%UX5^FT)(V^nP)WYb|(pl-@11HvKpNrK}ufOI1ou zxidTS$NrAO(8}9+!+5t_uMd4oDxhQLZu`x3xPot-OI}J=jG+NCrWI@<4G1Yf*sq)z zoR9Cl^M8OGVNe-BN;zCdq3P*_l&{u#9R0Y(!xXhllrb;1hAScAiYbxsT9I` zA9`Jv!S9CUuh#m~w%+TCc&un{FkDmZVS3diUy$&ND=2bWSw6e)Bh$;7dN@c}s zT{~@&I->6-mtRo1I?qv1H9WDer&&r4Q#`3>Nw@((S=mA2J zLf-baXQ!Uox;&IBUsBNa>|5#5yJxn}4%xkTDyHu53N?@0-&N8wrnr1y)Qp|03qv}5 z!_we|6#YA$1MYRKqBzrvJ0K7yAB1o>Mg8vS za>HG4XL{8mcguYySZBn1aKVanFU?E;k%GYx*@lh-rvIS$NdL)BKdx0}OF!;*WjO7} z^c-`K%$0Lx0jE5ME`Ea26(rIPRod~vmzRTtf`gpnx6=4{DttC@NfD|E6ruBqL@KqW z2`#NeborXlQZ%8ZXhKT|YeJ?VnkMwXe?V1Pn$WYT7T&8mY&D|`eTO=MF4V>IVRRuW zrcM`9DXj~AN}l7K`S3yXp=0g4um$C09YG&DjXM1ebZR7hD5~l1^dSX{ND^lBA&s0) z0mp+rvo1Lwz33pDuhEO-P?oyLsk{;u(}Veus5IDo?Wj9Yi*`{}YLWDE(}Kj}YxxqV z?s7!y%;w1i>gv;+j^vV+?X+ZN4_UG@#mXp()Tu~24^u@#19xmsJJN`qe=ikjm#-oz z=pbS$(p{7?LWeAA-wkfKUB01jiJ#V!!8?xNF*S);C=)zVlRjX+C@CtI>qz2#YA(O> z^`(q_m;BM7)sw&*rD-F|9yz06vPhp42D#IgG(6ckyd{CqlL~j5p0rbX5&}jN=n^E5 z7|ry^uZY$_&0S;7`zn(pzJ^o@cu2feMv|Oh{5IP7ZMN~#=H=dqrw#G1ncUvguMGPc z6e73Nf;q+R5=}bfQ)A!czW7RuuoaFNdNKyKduix)*`jVs9E+y#R;miNH)FlhW z(?LuAUVf!?p9(S+%kEOn`1}eEW#ro{@% zhD+}>g-tQFwscxrMhFvX)W?4is8Js$KI#4GPjCJIL4V@V)Ifjw!$|s54XQ)!PK|1y zKfQU!Ncz(qJTd+0Unn^JNeT}59bSKeWG+!|-NfpVHK}NBKHUo=^Vkn78_G7qN(-^Y zN(&|NPjWH+Ny<~8FJ-xoVl%T8rbW1poX+I*C22(E6t6}vYQF-nh7uFir1!Z#hRw<{ z)6N%sC~xMPeXhGkn}_v!3lO==obR`+wxJJ ztPXMOWCT~rUv409VuS3CauVuvbA!yP z?-ImhS$SUApoQ^7r*iYaOy>87O3&SkdwOGVu`gELvEp76vQ$Iwzp0XVkxtW(XseTU zi&C9(X!F>uo0Oi(X~X3A*2WQuj#>{vwE%~dv%|r5AK(D0C@5b2XlQLeg;;ViTTPyq zzzghaInpQBdVA>0U$WJ`$K2+0vRigmek?Jq0=!8tL&}8KJ}zYONs0U@243hVeA5!# zMt%d@IS-TdbLPVi4B5tcod==?+Lf|k9n*`YbZG(3gESVufcNMC`#icM9$^B`)5I>o zhh+y(>4f#W0BLiO?L>6>*>Ox&8=QS+l(NrYSZm)`XMMq=@>=Bk8!Pi;DFR!}m|CN4 z>bM;QO)Yaa{=%Q*gHlGC|XOEzmp$G}^=E zLzDK-xqU|LYgF%)mnV3K0P)9Mo=VB*WFyml)Q!@ z>{%w_n9uU5NPp0NBByC+Fv%p z2y0>uk}X+nBZ^nkc^2kDHjyfNAZ@M0($~V}SS_<>BbDakarh z*7xChJrMEGjvYtgc`RYJg&M5bA$@?u4|w@)cdiUxNm^y3?Ez=RFw_f3;bT!WrL)Nk zTigxlmYSM_6BFkV`Tudp1_X+%nz>Pz@JG;X20z=kcClS(I^OXbtPsV-xRG?+Puy0F zH`pQrr*&MH!aNr_A!$P=GixDpR`U=`n_sMKei0awU<iAxOXs-Oaz?MCVG{cn0 zA!<|X@vM*uxV|5JUP&=?+L(-ci;Lt?UrCm^5@cjvQhC0F))f{5d9{m7ze>IWlIZZu z%HJS4Rtmerf1Am%KRP*fjY%mdu}n(o3)eU~c8y6lC+RLXIrh`l!kzED z^yQdCZ?7<01AID1{2po%(M@pum7%g!-;0y<5?WRetI(7s#1s%Al&8W?q#1TGi4tbY z>k%cdhvH?ljhR9 z;pACVS?mr&o`vRbON%pCHfeUB+yiFQIDz&_wBhy6&>AYwv{LHJGcD!G8n|E5MC;XJ z(pWpdDOzF`xHfrJDq8(cXziO^$C68nl=Q7U?mg%H31}n7vYkjguQTqg3)r64-pAA@ z$|`gUxG%<7&acsWetq?$=_TFmwxqkkBHK4+w8v!c-ek01!zg z#vYa!>twSo_ZQfGw&}_mnL-M$=yVA=sMG@{pQRaCE^CCE!Mw4oUxZz5eLw@RscBY%s@*+ zfQn! zeBXG2d?5r56hD#?cG-nAGF$X7qBbopk$3wn8D?`<^J8&>Ch8>c5HS3jXdy;$x*2ZS z8?caf(e_3>#*53N?F}WF-KlNAZ;E<9y@Lb1(hm|O#!lAw(6q>{<80)n<0W3WIO2aVEY;Q;<_5}3fy%|B3gsC1mpF!OgU!VL*bTK-=| z8GrxnD8~teezHY67cFh2g`t}##SJk;%G|EQF4o<8LTeO~q0tu+3sfTm62TL;Fde>rFc9HNKJuemhBHa@uHN=zXQZermbf_g!k)HOM z%<0f3KyLXPOITvb4J^s%SJ0c$n!goV3A0)SdE%!c|9Y=q^J+VH>b0gaBDG1wiryx| zi%FkqSgoS3W|gZvKTUn9@Vd1k5be?v>AlKTf1e{gsOKJi9TGeq`WZxjL~i&i3DDnt z)N_$%5$`3-yYqYQ$Q96`zup_XU!NEgVRlr$5Ut*`qD7gQV&22vsCe~ye46VsQFYr* zB=68>Gs4SClpu!ek=g-9Ogcmb5wtnJ`asU+D!8?FZa}r; z81scaAP~AGLi?j^9y7ZAt`&$$*7{&8J%)6%$QIhYK-;4(xkCgDU5kJ()e7Or^xIHE zUT$sU7ws}Fn6>xc!Jm4a9lf}>-ouoFFY1UW@kJ^3KBdYEXjtvDNXiSeOBP$8Jnoe% zj#^4Ek^_;|F%Wu+_ zp>JF-)x2Guc45Dpa$QXgsOG)KzW!`~)3Ew^=ZgFABta`Tor0sOLthB&`{DdePCuvm zif1C;AL7-ogw{Refso8V$HzqZ;Y^LYhXlbKD5 zG-_753iGokGdmEub&c8a=CRgf-mC6|C5cX1?n2Oz`P}o@qH0@FXFk8iRevw@S(?*6 zz3*G_ngI83?asG}a3GCyCl8w6|7inb+xEJ1=l2(7dVdJ>yCY|QcYUDw{fY@{ff>^J ze0^dpC+$G)CC4$J)AqD1ztT}swyuofohJ6uU(fRL<3tvRvjgIHnkrT(pYU5 zKF-BPWsKHU{Bd+Uryi*9#gCh?{+J1Cmnr1@MD2eBQ~o%KYau~XUYS3fLmY^oUU5Y% zd9Jh+!;e&zL7f@|Z@v%j$JGnqT~DuwC-)fw$F63}IAPz+?@bL4+rWo&@GS-+-~7G_vTx6r(qJ<}!bO=Ht? zme@rvKYk<-C4VK$jHB`l(-+SWQ3t%p_~X|uMjgd+(F1O6myO4tR&F2B&GS_M|; zRXBmpaOO;HbVC+Zp5&XUa5q(gbf%lBg7YuhUNHg5_cLaz<9&e6z?j5ZNhAMIk$w5e5O}VYYUSM4sYh7C4zQ?zSdtdswh=&Ro z*e~8#&Z{lw1hDqn{JnTq_*>NH|J~>RW!Xah@D(I7n5X6oKS#7!-Kv+H!;!Xk@>{-y z-TgU5w<+2>e}L^fg`cy-bvc$R72fzwa3kbBLa%X1MceZ&nOHjrFXR@E%l`vU;ok}pv~}jt+GCkwwg$%v((>!QHr3IO1Lu(b zs={@qVmiH7>E-=j&a-ZqwdvDx( zDe65|$7wmIM%oVKH&@X33N@Y#zk!Bf-xy=xpwXG8I$zSgo}&zl21e@-fpT^rC9~!f z7N%3->u=UuHeE{WIK8a*qV3Ie({~!MH<8PwP8@00OlP{Bj;o>)3zBKV%vsFLSv;w> zy@@Ha*q<_wwbFii$~*$?LFgs5_!6-3J`+pU=i|msh5y^CIRaIJCWs}XC`zFRM`Hc|t=?6?&~ihBWLRju$Lm}e$>FHlj}@i5 z44Q~KCjSU#+w;~bb!84~ClCT1(s^3r6wXS2-1qlJ#&_5biFC)TgFt5;+Me#hz-iWA z=K8q!i7k0rV#p%;tv3l{5d9VkB6`Rt$`6hpsAn*-a^uBK!xdEE*^@^lOr_TIp8g98{~6rVQ~F(9)k zAaNLm28~MNP<*)ofbmp%b4!p;eVxD!1lP6oExB`fU)q0Wd27oNcJNg#mD>)anxBrg z-B6O>toI*t@Bi(u{r7d9`@VNpx!2qF%#^l+g)PxL%bQQSv*!NkeE+^EKcPATN{S~T zbSJg{wfa1$zjLB6#$lec9V{BTv0skd*e{1`j827z>C^zZrBO#<+2A7G2Sa4QZ<4pF z&uyPeiHD4SXBkLHfydE*pww;3L2WYf4zzt5H*&{LS2Y(%Getma{kWXOUHEIL7^>Wt z3hz~8%%CH{raZUh4B_-kFy>2_RLN*fPEEsatKJ=oUmrHm{Ik`E z2OI;E1D}rDyKAX+zwmy)-@5kuEuig-jv4-dCqssG7l;=PlX1t!EzKvJjAz{-rUwU}J4TewjSEX(z!-e1be#o{EG6TA+33=<$V9o`V-@CT#6p9l2kD-s#{yQSGc9Y+lY^2@ps=evr+2cf^qsc+-e zsuo6^TNqn<8{{(1ELV+%X+WttnpgpkJpd5{XWO*R;n62_fR-AJcl`0!@={! zHw>P#Kzq`47CcQpJP&dEeZtd8RfA_Qg+s%$^st50Q2Jxf{=o6#e@oNaXodtQs!Pf6 z7%D-6d#=$RNbm&;!;|1J{NfoZ8+>{12_H#-4pYMz0^h(-0{Fz=P3vBkf$vj-C>ef% zO2C&|?87(agTR;X;B)e8iW(eoj)^569)v^S+w^1Lo0%oQKGAY~8qmotWR~+yj115- z1!gk*Q{DvV@d41K6ow4yFypCujHiQln%*83-dm&#mM&=+mZj*S{vBFb%*;QlZ~Ef* z{#U>+8vqUCDg#&3Um0AtQvOv<{ww9l@VbS|2RAdhro9U?%{#cB(Ht`F}3Am8pwua zN41m<9?`Xn4B~Tm2Z)<=C5Zom!Uzx#Gyg89b{gL0YU5DwPCq=BG6deL9|hw%UC_xhn?Vrx}aT0%=3cqxPEf3&9HdmrXVq&fjF*P>Y{;S*23I|vOA|&umzL6%}ZuJXf zoCK*{ta38k^aH<-y7oQLHEGAGeBZv)a!C_o|K}*)xPc`gre|VfjlR8`)l2mzWX@lA z31Sp@HPn|f^q0%0+0TDauB5FlU)Y$$(I|awwxk_)-0=RVef$!*$yQ+=Ybs%`zifN zhR^=K51FoW@H-4Wok|^}rv|+|EIp}zCQtMe)z6=62@a8$mwb=$I?+UjgK`+P;JPM* zs)4G&7}J%Ar-4EaaNu_s_~ug0;9IO04+-CNfp%C*-VpezzZ<}3<60A_J=u!hKQ1)* zrc)L8uF{p@TTJ17z;_MR48B|R;)w7aQWeM&QX?*O*|5*(;E$~vy}$LjY<#qw60C%^ zd_n`>$bh@$;EYYV<}yRY^Dxu_BCo=~rT|VZ)NSEp0foUh$zo5#jz6!s@mC`sE`Ayu zep>VNN!A5NSQX6l_)z$f!^g?f41TV@)bO+FAAS5Rpa6b8t=q!SYziL`Kd17FoBwL$ z!^Kb3@Uvv;h{(yo$FwxP94C4)18`}UP+7RZkTLT+J~BcSKt`o*3mKX#_3np< z1K4YWKPOn>cfN1Kg|yD>eSg6W$-zf$njStUdeHp9A^P49;Lm=^@N~=H`FQ#Z3g9VE zw}q#%6b9ib`2H~Tl27e4zNV;+4;5cES@=S!&%{XtUCjBLgRe8v_&Qzq$~C|j2a+gz zMopY=m^$HaeN4S`t6}PqMjum8QOLp6F!*_fIyRs3)YylI8_6S|pERnR*6T-{Twp2( zKan(kKAYw*jqhc%ymYals2iQgB-D2)KukZ=Z4uK06mn3MGd{!M>!-Y6_}Z=}K3sg& zIR4Whe5LJzKNnv{PYnUSM$-HrxXAEz!MA;UokRhAwduC-^*0ng5Wa5ah4lRUzMA>) za0Z%k=3j&Gb|}7;LtiZR`f+rZWzD}>%Q<3P4a;g;&Tt&enard0ko=!1+$_qL zfAb6nOV{`~IG+MIsMKxY;1mjja4;PF)N=6ay$=`u=??y;KMQ^{Aeyq_zrf(1>G|-7 zC_?Q`(#Z7@c%chXef6*}X zNV|`r|3v`|jnQpkXd;Ci3=KmcrPML{n69@zJltf>4`*WQMy|(dM6yZCn4I}T?4{3t zV&XqGf1=AyZ#m7eo%iddJJ4WAoV(gbVkHHT*s0q>VlRapB&K^atUa#x44-?|z=w-Z zf4(`BUifU%&-;>m4v0hH)99}zz-L3?^=x`CiFoJvhR@D#`S`qz0{FZ}w}sE86h^}5 z$o{cq=b3*jhADpw=I1c}u@N27ZDU8_iMF?#L%JVELSOJnm(iCrb2vpkWWbIb*;%Ii z;2WCDavj}%+nMhVd5)Ez<7d-IX`r%2Z6?NcSkGG4czHKF40ME6eOrLcGKaN`$=5{i z8OPUKzNU##v+RnNFRKt;c1g>86&jX(v8BPy5not01;Tof@8|wt!usBNdXWtO^-4yZ zqvx$V&Bt=*Lhdm1*Xp^3{zeM^&^u4N0V2u}*|m&Kt$un>+8I8TJEG1pwIb3zNvB7mF8mNA4?DIXO=O;Lubfb)ce3 zdVix@OoqR8Gk*+zu8p{|->)VAS}l9KMm5jim2=%IUkhG2$*$R7lnQ@)&MTL?$t%XtM1xfoj2^Od^pPXiXSdx4~E-09J;rJohHS3RXbY7MpGTDHnhCVMhrFn z-?RWIGlIt5wKe|6xx@6!vdYEU4iq)d;_c1u?Mrz(V%zZy9p~9yYvX6bwV^G4>{rQ) zdu;GmE@)ah$+h%DS{itO4n3|8v2(grV<7b3PfKoxSTp0Xoox?&_ZD%BjA&#>YhnM~ z#cb2tmiprKfl1`Uv)bW8wMxx3s;ML1_G^=@+6=8;p_EZ=zcO-QjVtNh!R=HypEpC5 zYz@_s>v+)HbXzy@q(FCqjPtp*6JnDu2r0SsK#QP=-T@1y^(QrGqWc}mv*N%X&$i=h8$c~!2RIgWW` z_UANnoI>~?mZ~oh=I(RE7g~;BZ&}vn9BL$%`!()$H9pdFmXeC{R(cB$LwKrs1HD^i zhS086LkuN*PfO4@{jK#PB{Ozv_b3}=*}OK17G_(AHUi0&uF3CRt8eMXg;}6kGsYja ze{>T2uyqu5L!s)9{#^@yW{|sTWrcR}M!nnbRELw{Iog!ry~e?o(WH)V^IqxeZF!gN zYTKhp5)?%X-ek9C^HehG7E!yN+Hn$@cpQ9NGpX;NsoCFd4SgrrmZPBRr>|qOe=5Bj zCl&s;v-K72VGXVPj`*|fH%*2=MSE_)DoysCPjkq_iv)C)`jzOtG8S4vPo}_=Huat2 zKQHI`@%H?HdtUICUN5nJ_eKj2M%#WXLB%JxHJ?c3BdFZ>TC>3Fje6hGGis5sfqj*9 zWpjD|FS(^pPp5VswXNO0-|~v?&gAY#vOB~l_~5_zk861R(V>r9{l_AzilL7$_a8U# zSa?W7>l#1Pf85C9&kTKhJdXv90V{RIfIWfAr&77E(0}ZGOV1pz4$|NFkTy~E1gbhn zA6nudWixSj=FqSG!hgJ&$C06rzvDk%%HuB%eSDpJ{GC>HB^hoUTH`{$#%fpNvY|C* zP(wsn@TPZO2`b$Yocd_9Wb^kr6-G^(8VW&eInsB-j`{{05- zf7QR=#QneY@8foz_zlXD(1~Z=2Z4@m;o{0j`&oNK_l{mdSt%~g+RfuRy^+;tJ(~(o zGtk|kxlzEo;v1mz<_FdBEbYp-kIK=gqF%dAlc8ZE8MS#({K!y|yY@`s#oH(J4A;osM>TxdG4a(AQe(z@}as-WvI>wdk!)V}m$^hu8~gt8>#4BP+sf36zP0(6hVsh??W z2S-U&n|Qu^|IXljyVm=E!~6H=<(1|Q6!RxPZ(y{YnY_P?ZJ+zQYJL+1t_P;`6RLYD zS}>q(pI?jCdauPgUl?Us^y+HV)4e7ywootD`3mVH4@7C4=YP0sMhX~KoL$G^*BmlM zMk&e4F`;|w@&+bS9@R2B^vh8Lqr80NOk<>N_R3Vt=OXR1S8}kKa(#4-veXcyo2l+u z-W^R9L^IeQlM09Q_4a=k_WQrjPsDZw%V1~(59}BJf)F;bfoh;{!(#QdL){Y+Ptx60 zDRf^yrEj3UT3V5x>s6o__$qI1<4qf(t-6cMCkFtV-Ph!3tlS7|vkXOb%$VFK#nzXGSB^^o$2rE6iHgERkPRD+j zpuOKQ&7nx87dbu34)awVOJ{=Q6(19JPv+PP+r>V5xq5CY<2NXE$34=&-&+*Ax3gmJ z?7hunNb4F|4TBv$FJ13k)k}X-_9Hsa>3mm8&k+&rUN5!88D#?>r7K^iE6wvLoTI`l z3e!|jsD7o|Oone(SiLa6{S1G$kOByEv!8(g*A!kH?HATg@C*NTgkN~F)Gs8;vbapT zd)S-tilTIm53qPSy5PZRVt!>jF?U+ryK0gWi(rMdKD)Q|&8C)#HeDrTrQZp7t z!?ELyD z(uG#dpl}@?*)7(FB;^3FcTaHqG3Z*R=tEBlAWcTQrUEci4swYCLe~rpkJrG(omU(R_ z4rw2M=QE=bvqj)zUeTmT8^;sjiz|v{VRJa^B4* zun}4r1rb)*OxFxGtGGkvCr%|ukXUx3bmO)vDnsjreXu;plLI=^{BXe#vGSqOURYiFUC?+vJ{x4W*6 zBE4e&kKak9=6HphlV3|klHW+tY;j&2ld^5VrrBHh&1F;*P}fs)eY%7AZ}G=3lG-?A2!GeFgPaBZ zRuJG0&~nD5)ehq%lf$)y>#BV}m-lzc9a~ z8JN2r$;1*HnXBIG4g*C~`xo&gXls%~k5|NJYO8kCD+kl|(At;jK_)*U`1yD=wUN;+ z6?v~F%YXIuoBFN6{;4zsYOI@E$^I`IuGh(tM!b2V-R2_KiGgCiNEz*v``%MkkHvM$ z1>d_d=X)IT`OpDADC~{%>1muk`GWi8-8zISu`YQjzguA`08;oM+^2}M4fl8Dr$0Ao ze-|^uK{zmBe^+7iasCLB{)#UfH8{+>hA*Juwl|Af!nNM|_P?t19&W#GnzZu?Cov!> z0MJ&=e-g4Vw2*DtIAOe7S$y$M$V3j!{HZ`ai=`e`TNn4wzmsfN}S3? zW*QQcMHXCUiOJ9ruf~v&f~EXJE5ooGCGzrKjd+`l6I~49h*N*m*wGiDyhw5(*?um} zuS;H*A5Wf>Kk)geSNKZA+Y=>w+`YwP9XD?I_>~#@z=QTk5yY3`B#59w^GkK!)11Un zTm4pO<;VC+9p_gn;))Iw?ufy9&hv@`)`Q^2l8IFHhy{uo>R3uo_H%>{bne6 zEeLx{XPXT_=?y{WH%n)=`NW?{=@YmLT=Eq_rCzY(`-rbD&KhEcU7 zS@T}%ui_FYAE`~=kg7{ANu4c@6Y(|}(|Jh*CGkCB;3LLs{)*bhYc8V@v8+0-ojUIi zWO3%)wf^UTlZ0ijfHQ{%>+0UwK(poWvr2L5})uSv1$--QEjqh`U` z`niOu&~jp}%P6&=V(}cWY=AF|uR-YJOWJ1UwR}~jNh&eNk5E!BfA06|u#0#CoryC} zUWx2495b~!6vu(KxXycL;H*rmev4`xfX1sS(p=S!FQU}E#zCW3N*oGQM025?dh$CZ z9&sHGtrVkHr>|3|Upk+|SOPVo05K3T)dBJ~A{G0MOmaEE7z#pTsveGECKR>@58{;utnM z_Vwhm+7R{T!>X^Tj3%z17EfGWUYCsI!@4VC9@|tFR4l^GF#g%pQtIp6KEGagNw*FR zqxYE5S63_s%vtBjgNXWF9`zn&x|K2AUShiK46T%P8S(aWepbO_@DrbUBO-+3MF`Ot zo=;97G^V^>X(ChWXrZ_y0@?=H4TG;jH_0>HT@*YB4RDr0f!MfTx3j;HERJe)EPkuz zqJiQFSZ?_`zlALeBW*CoW9+xsSV&nN2New*;U7&p#vS_}^9t1UlA!CW8W=+==GW5w zcLdC;TL1=%jwZInIHLW0OkLEPOWfPWZ^+Tbgj+cbf(R@7dR4h5W2VXAdiWn+RP#2C=7a)>?I*?ir0Z{|Tq_5hZ2@MpJgHJ*P z6GM9I!J^R0f6|!AP^_?DaE49#sk&rCYT&rk)@a)RL-J}s!Qt#Xnm(y5vD79IFN!zA zo+B?D$-9$aH8h3I$2+V=y`zN?up!m?7{kYztBa56p9%6|&_9}P4En}}K7)>PO@c{H zgj#Q%_elFSmF-te>kEIJ+eeV1GK;@yb}W`|BRjk@m>5uRpv&m5NPQBg0xGAf=`-bV zX)-gOh@!=iD>ZRB+RX;0%%r&2Pj0!o_CU7iH#Cx)T z4%eCrIKY-yH1Z_0@&T}-qaWXmdDv+-HfL#UDl|6d+Mz_s>Y38<<;shf>K38n7h(of zMZL2@_j;6Q%l(-%1;&^r<8Ij$Fwu}R1)6LMe9;DyDZrq*;baOhoJaU<2&tu^gA{12 zaFn1^&sUwtDEI_@xq3WLJ-!speW2035y(3qtcj|0k}8dkq&B5EmYi(C94T8yR^EF% zhLHV{R2q09w&B7kxvdCsheemwS=A_@lF6z2To>QAyBsh+> zr-nv>T)5t~rCF6m#jg6~l%9IdYO?w9shBrD#%zi}{;{@~aO)J7)_T1aQ%ORp!H4hr zvm~`~V2e$nQ|y(O>=h>-RM#dJwH*sH-WgKE1Gm{+(|7cZtQR2CyzCdC9wblySH$vW zbck4S`whly|BpX5*oh-bw|D_K|Mj>kUc+sJ>n4URG8V55QlRanf;YO}qTSHSIfy)@ z#iN|r{HP}0^m>o%)b|$WT!bMD6tII&Z;(*Rt^p^kfqqb zER}IIU0f>*#c;kd(xnzoM!!K~PUdZzE&z@FPw|Ru(6+83k>*SuK@@p~&8H)__t2X? zk<^BIZ&!VyQ+Av*q*ggbRF5`f(D%#erz>%gSYwCU!)tcilLH zx=hZ2FVH3()$#Lw7lSXz^N{_8fzQ=fcZZVW7#aG?ci=lSc7iT|@^y%k<{!~e+cSkU zDrnKhdLpeAd8S^#6vvz9V7`sCbBErFvkHcR-D1P6exHjw0}{&k5Q4s>YyMam#tV6_ zbREAkI12h6U?-^Ge*;L^-&xfBi8`;dw*8{Yh_|!0{XB?Zt6Q^WI?vQ3UD*G%kBZGq z+>Duub^7)By23hb)-?o20GH;ztIV*5>}JkNE8kWyXew&sWM$t-E@pY%TC9 z@y&vkBGYBcoz004cX`FkRXbZI;v&TxKewWv^>lnPy)m7oEq_@ou{)M{6O*ci6;kXV z22-g=efu46&W}ZonCp;ZFGv261s86oV+KFx=W{Ba^m6vo`N_gf(l=wCafPT&G#~2QjP4;o;K`~|aI@-Lm z5TCGg)4?@NJH3z^#y2t}&Zjlvm-SyiIg4p%+Lvc?n3v0MFym$K`qL4wXrtu>nlxA8 zZ6z=FM+HB#=4tufk4S|d`y?ci!U^;AiheuEs+Sw9U@ONmW!hfFY55{!(U!_@MiF2Y zVR9wQ3!``z^Xeu=@ko%!0bwksy%u;H>l0t8ke9eA?!CkJLaYCdCt78gbxLUEH!0Vz z`rXnibYy3;xgtNE|9C82BD491W)?EeR+rG1y?8;9e``kOR*8SBJacQ3e`}&{kvcot z_DX)sU66Sy{N@StREsA)g!a4}TH69jv9jp3GB`iUQ+a3TEL|TsIp$qTTTe89LYO+5 zE)A$2z*p@Wyu4NQO&; zHSaHg=_26L0cD6TT3Yv=D#}Mk@ zP1dz*^u&W{X9uy|Z8z!oT|=Jw94+@x%>|k~8FdxSe||ZM$-QePMSIp^Bc^1-kz8CW zDI8g*HIZmEaUk0HVgc?!iR&8Zwt}t@s&b#O%c&i``m4#Y6ZM5^sc?l~qa%3oU%`{{ z{*!z3MA`azEv2;qEo88L2r~V-H5?%mG+*Y}5*^c|hT^PB(2sc40ipyPf3=VO@_3Nd zUU;mvb(h*w7kwqM9l-ib@n@Gxo<= z$Akeidaqb7>v&HGXj-2*h(c(nui9IWUy3u+s(R~_jSMYe1NAIP7U>K?0b8%566Vn? zM^x8W^+qXkj`T%EDzMxXuORpJT)QIT4GD0`N;l`{N`~;ee{=kyANE}0K#sv^b{~jl$f`<~WVE_B+S4IA zi-ykM&Krfzr`a3RXnb<)em&tIM99lAuQSS9)x9e?e{BPkbjQFaXk%b}Ry_$snkjOt zvZ1=|u4ZjTCH;bvmhD!r_ZJqFG#?d%?wdLwPGQ_Uaq)>U;zZ^M_qj-g*4tLlgs6h0Q6Rrpwa z===vGUaU}VL}D*@1Y^A|X{P~jootQTV>cmBjvi@G*3BICG zorV-^)}q(8^^6iEjT(GRv>lXVV={D&!T=!_yF+WWFr`-{QZ&u*q!?j`RvpI^yz@Fl z)>r|Exgj{JS()I!63qIjjwx}c)kR?dHu(vM3+8_@b9qDaDXH+mshao`boKF6c)+fT z;Rzd>KZ5+@GUldwef1kFdefg(hUx#Yx@&Acr@}w=uQZ^k@DJ^39a3*mU?FlAO4Yt2%(&4!E!JW*qH@Nv(SJg4qjmA_#n3(F_X{P#X z>q)>;WAF|Okg3<{uXh{#^epzPfLe{uJ_F0sKy?5EG%8YSh*oWkS8)Od1Qx>{V_7}U z9miKAMaD*j=*npFOGpXPBJ8TI1HKANi6tXeXcE{av{0>iz)o@h{FaFKM^4tP7sEZ1 zuh)BjbPP9mj$M8XXI6^e&dl)Js8@)_Jnw+xx1Tib`Ek5tv7Rmg#B?|S=j>KjjNdYu zcMdR>H>Ql;a?$_{dJ5c;Rx(v#vYbV;b0DObR_*lJY3+7_RawtLr;eK%n9Rcrt31=O zwCY&fw$u;?Nk1HeJP3n42!rGZ_-qF01}e=SYh7EP7kkA2YRzT%V$YD)hVaGhMwuC| z2qGS&vl*^9*RiW}C3E45aAC)zwi+*F!I9ur@u*GNJc`d`^Fr6@fRfUj@LbpQSwch( z|8s4V%--jJIc2z$HKo=oTyc@(doVy_ekjkAVSWdrDBt3HvFZb%wGSEJgR)X? zCg3u03&@g{FZ2G|1-Tc|)%jcNn?#DEzd#7jio&z02&QacX@P^P%~2F_a5}L-;UwqE&y2d9TE(US)mF zE1n@1d^&mPj?rv3{(M3zJjbqztMu?_@=$OYOFbyA5wHG{;~F^P;5f(WiS)V26!du! zlxStCL|`iyR_!gUL_sK4DJ}Wt2XCR|j;=~`YsF9c#-llFYoe?hW%zZ)jF;ny>&xr- zBYha|*-9IOSoXR45RdFbQeN+LY*04g%v89_9-I2I*ObgJ1zfV!e^MFu1FdW9$XUP0TxZdsWvCj;Gr0m`&%iME((~(_iq1=w0%MSUKjs$z`^Nb^;P2VY1%QimWgL!5==37h@^j>3$loX z>bezP@iAjeM@KUnv6QB+c=ZnE%DUJCf$WkyXR_5jl5^hRTBvX0_@0JVR3H3{33eDX@TX=*7{tcT6wlbxvUJi<$!XG9u*x^^5hE!Xm^^2=S3m z_jd7jl#ZN0jC>^Kb(x?cu5s2m9I|}=!zN^ov8QG3X{=x#pPQv`@;t5`Jna`3aVn5k z;M(r&D^%OfjDOh~);iGB`q6`|hohsj#-=|p)v8R+sS+zFJ0mu$@C;QVR)!Da1a0T* z&QY-~w6+~>$&O<)wODM#&0#n4;s}l$qQ*nr-O#Ec-imtfC?s6f@qb`a<8cx9 zbEQUS))P$5E&Mr;naz!Z{~Ncw<;re$bTh>9eW%p@}~nHgp#G({}If~K@! zL_9{XVyI{*@8CH{qnE1zssuqCuxj)uXVhK^4>4-tgopJ0*SGgK`Q~dvIhS*Pdc35+ z-L>}GYp=cbYwhQ^hubpu8gU@D{lFrv&)iy6Rmmpes|e9d#210o++D|Z;W?O!iS1Zk z*660;<1h`Hwx-P_-*g@8FoG7m5WFbvXG%oqpFkR&VwE>yB*Scjiygk^YI8}^>8 zUiGu;Rd1u~!#KbYusx`H+`@MmVecx?P+m~lp0C!ngnq;mzYQ~84c!EGAqG~SC-!g* z%h%oV{(w z^mNrL+gH89V-Snwae(F8&Ey(v6_v)KrcAT=D`%CFYvIp zv&O3Ra5RE77_rlmdO8HZb5B2ZYEQp%UhNv|l@~%9(mqymus+e#rEFtQU!(POD2JX7 zy@ef-U0ldcvN2cA%U^Tlg_boedB;Ic;^ZS&9zc4DzWr;hZ{rA+`yv{(56ePS5Sw)E z2fUKHbd%PlwRI(L|A~6!t6`RLSjF_SBB|RB-uhRqhI?25)3KGF{@SBCZ2K^W5w`E~ zZSyXEd)tMUx1rbcYmZJv3beqSnpHnX$Xi!knEw`s@EiiW=&%gm!zE*$$iPlu!toG% zyb0u(u_pKPEo6*F`;XR%-_lMj0LA(5HQEnq;?m+xMdIX_4}o#_w+dt8Bfpt*DGl_y-C+bvn@C+6I(KD>x$`pi9DD-5e`@0*M`>su z`Dj~j*I}u*AE&a{^H0w`K8B`&hF~NLi_PV4<7*@^#b{>Ym%mUARgXWWMPYl5gGFOq z9Ka544KARu;e87QCI;fiHTznF&9;qC-?lQb9>nha`hUZLixR;Ae%%w()d!ZcnX2CS zo*Ogl~kQ zRY5+lCUIIXckei%XT(YpWsK@U8GpZO)nnE3mOg`#lqVa{Y5g4iR8*b_{bkL@7>8jV z^bFK|2FHg;W#5-sGd?HCxibKbxMkb5LVtqq*z?inbvS83&#d5svnnq1nde-!`wg%d zJ}%@)QK50`7=5}Lr)00?eDgyU_({R-P4TVxF~z1A#TW&{_(JvFs|h@n&hxo#%qS_26wqTUWhRb9Y1G%zi0MYR)&8 zra$shYURe~BHO=>871ZpJ2aVMEPe$PHVTbgJlsaa4Wu7eAGjQ!H`nM?{vZ(~b^C$x zqU!xeajEz(d}lkK|2u;R<2%30qOtnps~0~;`+sS_@M)LHYVK|?6z5Z{&jzVah1BP} z*_=n#C(_8e{0x5fY{gwxp~q24Sub?B06(Lwt6*al7D9!#%k<=ra`N{H-%E$8_m?Sc z8P&KK!uHOA7OQ<~S1k^CKsZ_O5-!cJ%fphvh#eYOtnl z8m~F~8C-wB6@A#E%{F|VUNvu<^|i`*Igt$ffakpZ@u`}5<2CD%$P*uV;fjrKpVTL8 z8l0)$lktk(e1eOsu6J#(V0AzGf@d?>6XeOyRK9)kuI(E{SWO}f1JjE*3q6R2UQ<+} z?aAMrtsteEgIj|KcYCT1Zf}@;q~h(9xHEdxjvr%(L@mbpCvM-6N;H;p-y;Vf0Ubp! z_g{sDTO84cCG<;pmxNIXAC@pC;mZ=HrHc4VC0r+Ajf5Tv_er=~#?v(#mGlV-dnDAu z7o-XKOC&6j@FEFoCA?X}dnFu_@COnelTep?!jw+mJdw_KCHyA|e=p%15`I#`h=g?# zUM`_s!W9yxNjRM%EEIU@mlk~{M^^*tUmPoDhaNu9=dF*>HRpGyTxBkc!0nP2>gcqdnVvjIWG z#)uyq$v>`8{?i6SQ`sxIzucW0}|Gj5v$V<9JAu#Zj5v-qhSe6olW({>?k_ zh2-W+=4nr+eVAz-NwjK>uQQpZ@pUE992#GDGEL*_Nut$ie9>f@#y0|*OM!kN&V#5g zt~cQ?y5*KzT#d*Ed44bR5sEYwRaa{Af=RRwYx3HSv|7Q}kwg=GouExFLOSgt-QZtQ zmPn_}E&MU~UF;WSyACLozm2vuLtD?ZD$opdP|h^&m+fhZxcE!}f3_4Fvx6qY(^v&E z$vll=(RmyxbO$D&}kvae-Q18BVDEfX|NPo}@q zpR&k8i+1J(a~o{XNc5@P0xK0n=2L+yoeIj+s9;@g{F6A^LDkS@Sin35$L){*a&yus z$7Q7)@a3$_4C;K%Xs~y{?Uj5A#(}hTDUSI|(r8I}4lM~SisuIxHt75>fcLYw7V$ao zGt~HZrZ^PbW1HUyztuw4kR>>;A;m6b+2>mnvaBG|Wo#=X`Wf7lkik$%Q7FX$m~El# z@(jujr8$&*E9FDq{MvNNhtB!yY(Wt|pR(5>ydV{M{tgu=qm;&Rwfve**;t3ug=bhO19{0W?PpP*UAZe6|3=K6Wq&M8Ym|dO^b)RN{SarBS$Gw4_wg;vV2Xn zIpdWf9^7@tcm|r{%p&KwjjkEZrOId?smgj#lXzS~B-yBAVTXFhFiFQAQ{ zMYR6-8d8tik+-#Eb*&@E8lq{q`!xBm+Xlb#aT|SnGMCyW@~CBeA=1c48U;w>LRuU* zrm+ZVoPjh7k;Y#GP^***RnAec&VC%bF%kP<_evfDb?k$$J z!b&T;=hNcO46?OnlGT%?h5b*2=f5oS%DhH@QhdrFLVQAeLVQAeI$=r!Iou_aES$O?TaNl%LGgbh8u}GX(jP<_8~w`HmeAMGem;-(a{%qx zG#m04Kz>jAxn>D=Cql@R;O-ozNdG?cN~&vFuR5N?#M+*3gJOlMHrWG1yt zWYLGlvuWGt0@~b}OP99i(HhS}TC9|jHp58hngd;JwCp%^I=Y&A(lGzRHvhP;>xpxr zYpF)7vnn*5&~+8D|9QB=KMGyVewLGn!+9~tR_-jIJMoFbUYyjnc3bE}ovGx&+TDMcSy0-K3;Ft&VC7H#Uzq_yZjC6VQ%x>itO?b#^DA_|_R(1WT%MN=p< zn}uxXSJq>yM%%GmAqUfWo&q}a&Cp=aOn##*0im`+1GO{b;_EA*QW{W72*^gtgg zMIT#&KCo~qH!k$cf_~Z1?`-H-1pUw#%Fm&SVukjWD0Gzd;5Lc=$Nutj6`Im%=$Gu@ z1XuVc))&(%lRqgA!i{p3efhLc*3A{Do6Ar)Yf(2Vlma?)N*jR_{ZzTNkZv7MrO;>^ zHAmB_w%baVcg{zg42*G^h_m?AID7JF&xD11h}VpG*CO5)#Jd=A)+5eh|YL-|7S^aeL(Z4 zZc%78(~xHlH~N#}Ae_@m$c5jSzH-MRx?_4C?VGevXd)GPNu!3*bhnRT%2j#9*-?W zoqs^1{T}oaT3*l|L1(@h8lB=#l7;+m`ib=5H|nJ9%%h#%DYT_?9_qzH=Xp|TsjGmN zmgm#bZzN{iLDBE?sNpJwRyfe_|02rP=vP|usRg#=Bd{e7*pf}1sdQ0$8tOV7>zl>2 zJ(b~A@l)Gw=j(gk1ryd3pT2DnMS`3Ve~5N zgf#}j2w}`WL&rF0@*8C;by-w*VgXeh&8EuoEXvwxji<%5wWLGIOr^}m^jPZHykH8? z!FUZPam6GoD_AzmKucV%I}E5Ywr&iRUD!X9Q&>>-8D`G!IRZ-{a+`Zc?UeMH`y(rhdC zsciqMP&M47#?Sj>?}mnb4?{dzHVs0LbnLsOQO~y&dIGNEO*vLX(lyyex?; z3ONt5jb6_6bs~#Wk7iQEw-x#UT)QR%b4Y_f$%g07r`$(j=i~EZ>0_xuOT#<|j~_}7 z_6ji;Cq@_^Q>+;4v5ts6qR_fW6`Ek#+B`f5^B$WUmP3x8Z^ukH^0g^ELmrrxdzF z%HnXN-xx>9P80j9>!1~vC$|mANX4FC?8geF#}s->$}#%IUhI7A#bQ4edxN|eD|Esh z-!lsR8qR{fcb z;r|j`4_qAkvfl$Fs@Kk}+@Ww;Q*iwSrF*>odS|Qa{PMEkUKcs`R{On?`UDR-PI4Ua zw+v^k*54@Fd4Nv47v;)lG~+lU(Eol5S_fzqhPcmRpL`m$Ndw>8p!I+@+u!@=$f8u< zKSxIU!ry=OIr4alWA83`vRl8)dyAXCmRj%FTfeEz>vLV>^tHJ)YAw+pETCW2c!Rqs zQoq^h^IhKNZ;E&Wem%T=o?~zA-k`hw3b)@K@;2>o@1cE=E~D&n`i|A|xl=i0joo)Vzx7Ce6J}v$hs5Jq9Oa0}cKr15@lQDO>BRc|qH`YWV z-gb9uAjC2eZ7XP(`vT5LePwt{psmq|v*@R_9C4Igkx*Mx1aEe+u5Ce|yPmbIO2px$ z^jOoJ8nmj6L_*%iwur9Rvol&ioELiiN>Z*3;jDYNKjid=ot*G_Qa`O2Wv*6oDOQGB z>K)Ed*sbYM>33~&@7)s!xx#u`j!21@2STk*=(|)(y6R?ksM#0TBZQp*b$h_ohHO;2 zT%w$b4kK^7{oba4%dHiiF2g%h7gx9=o7$S2Q5p2GvHmZ2h9mUajMU8`H%|E15nZU) z?M}DTh4V>Ik}NJESrjxNHQ(%WJ45vG8NQv~CeP+TYtR{TZwdr_Sm|>*?pEmNMjfr! zQ;@Bv(&q~_(Kn^E9RX3t*sn6>ZkyNdLIG~!1Laa#O56%X2|Xv%fZCe8E^Y~W;mEvf zLK@tjxytG{d)!So68%EP+b*Kf5mTyqZn3uAD@w9{ryDsA^UVhXugvhZ-iW7i*XHVK zJzj8c#1rtZzo5+JLrMI^vNPh_>?XUox#G5V)}|sCTrj{zwRA{~Hdu z#v6*XIeokR7`D)Z9!}lu_c;A7pWB5d>ke|iyxi;K?nWO=MSqzy_yZ|u44b(l`Xi1| z0M|Rh^u^R#cc|6tN8RfrIxw#~EIonH63jK;#*j0l)mfN) z^LU{4Vl2PKy{pC>j#NP$eFEBUaW}TLw75gMmfN=O+_AN0{W?+RR;sVxTptd)o4n25 zrg~9$D6N&Cha;}~2wO6*zd2Cva)%?Kz}|WvgcDs#t?pJ_Z6Ug5^UmEnYOA+zU0ZG! zGP6ZoZ=k*@(Ao;icyl-i0}*Ma^9i!N5fn))T`+^!yXi8j54%0}&01yEb8{#pME5s) zTTsIFekZKjo_c4f0_pi3xHkA?t2v_!eTs{iBuzwYqAmwsUt(T_f=KHGdu+*;z?@+^->@--^{GyIunf4luR z`U89XYAeh*v%Oot)7KKfFyv`PoEBf8(Fyy4&PD#V)<$cJ4H54R$%J!l`??}eW4 zDqc$<67bZ+#It^;ctIpHM{;^fBHj8%_g*w+J)2UFC^us{nB>1xer8QTS5v#w24Q-F z%{#DeVP#u*#v|^yIekHovk_O{JkGFsi#rrheQtkC#KSu1ayVX7pv~t}{ecK_iqZ>) z5Jiv6ygIJJ7DSw0e^@PExvLn>A>bWU~a3P}!~SaJw-;p`Dao+vX29c~+~txb>7? zE1S=1^$KnkB3YeSDtI<=O{=2dHMO*|oN2?rWV^==D^V{}pG0Qmau4N&8pAZi+3H5Y zhf!e6ud3m;#&Ei`2I`gY~*SM!c0fGh{k4Tx%wNr>{N`bo(`KV-hoR z5`5^MZokXSnMlv)ZjQ_$%j@Oxkm+iCv&%~23wc{Sa|jgWCFKde*#)9(RTZ~PLwjc{ z4p|-|Jdw@Wiz6W?!8b>WdV23r97+8)iGL26BELeJBNH}(xhcnu^>X< zME2WoHyL)S7?+R@)>QR2tInj}t?P+luPw|A&^gj!6W%s64q|cFsy6P8xK-80)1|^$pfo{Ar;0E)uAn6$!rugW#-UlMr8P=Hmsx6l4i`a zVUBskR@=4zMdh5E7(ZB^W_AG4tqEpERB2dc2`(=$1GTv$#;4PSV|c)b04Dskah{NJ zLY}G{FA<)g&yvpj@!v1M_uJ2Z@BVYT{+$Rn7(%Q;cw-Q2512myZOcWNi@&)&N$5L= zy-I}t7v<-4?f-P)34P|!_a8m}>GF5F@b_ANr;~rW@b{YjfA{#OE1%Pazt{YqPX6h_ z|K8L8&(1@Wm*V(VjhEty1vh^LsD0g+SY}JJE&1kLjng}><7W{&EC1Z$N#7qcEUh)w zT9a1Wm;+0{uD5aBQlD7S%H&?al@#d=Df;28expFzZ)8^q?-eoiKo6cEI34fjV{6V?O zXV*9`3ZuKudd}1)ZilGy>vJxyWO;`CYA5jHO+QOZ#1*T*Y_PyCr3U#fi{Zis?-BF`1FIJGX0FIlEe9G1pa1j_=yN+ z9?f3k46ixyvW4Ii<0TGL-Ts+9Py}H_!=5moyd;sye7-BB;B&}Q@ty=ec}x1MxL@fo zk&?o7Pw>m{j!);OFP?_qV$oQ;*3bUVMMBOTh<$Bxysit9{c}W1kmK(%--!tSmx!!C zZJ^aFOm9CdQeQ9V=O6mSP3Ohfs|RTs7}|-fej!b3i<*HICo{7g$Y=&FA>Y)Z%neEE zonW7>t+7I8K9g86k5EPSjrkV&n8{sIIcY)!x8P&#gg>#~OdN7(#gZ5=gb!y7+>^}8 zu)eY%1XqZ6ZRW`carvIgtulNBpA2)q(8I^)dT&KfP(q>aoYhV7@0_MznB<+#J6GSG z%oY8i$|L~y(BD;mH$|fnQ-3pupWZL}`|0@vhtDR;f8?;x*CF2vdbjcaR(#Ut<8=Y? zm*_xz$AO!%sZDC_bfz1KF=du55JSH=^bR@y7k4{wXm6HoIERz^%=D)uF{kungGyRV z**{Mipr%$!yDR(6vBG3KEaT0RbD@w?ZjwQh`XwJxJOqY#E)!^OR`rESQodP8 zaw>E69~mtHqB4Jz=ZXBu2ck|U7*2yoeNDMY*iW=+~kWd z!K8aJ$qk4f1Db0eYA(C3RrD-@!m`a|L-iz1j=$WwhLoR)$5&m zlez9W>P-8V!E(?MbWX-7jmR$RwL-!g38flhpJ|q)ggis;Wjda-Mf({0g0NevgjRcU zzX(dcscym7AmM~;UwZl-lFu%~dpEw02OL;w#uS2GXZ`8DLqvZVL zbVnuMm<$g}Xf=%&k@piPYYOK(vOOS z{0135a*yDPo6;?md=42ta<|~Ko9fN3^U3g$Lo$6+eK;haO@=od6ns@Cc@2^;ewR#7 z!YZUxQ3s9&25@48*^#ZBc> zBKhnxe56D0O`F<5h0Z6#9k&U-sHtDpNxrFlQZEThO!@^SAIb3G#|3|hNnWSqi{C23 zNy1T6d+C;Zw!MO{t6jh`le`hhXP4n4djwyPDZeqv=aAvCn+4yLsXnG8-^j;AxLv{y zlYX{yM190>5qu32)|vP=NIsI`9Wp;vCjIIpUx(C-B%Cnm*Dm=++N6FGj+x|jOTJh{ z@Hr&(nD|C?{bYDYSn!XV@)47Kb{Rer5_~--doU&W95Ot1li-V*`nk1O+AA3z4+_4x zDP2|ab;xj%&~C~{h2-lA2sx^RHd8xokbE(};B!bwCV3r_k7Rg9tKjc8)klxyQ)PIM zPw_9WtCGoHCU&m5B1|xk1QLB^)=|gF?v{^9nwPgzcts z*&z8yhIe=bf0xNl)=559hWE4xz8+J(1tnjH3|A$znfhLf43!Ivk~10 z$8V2z0X_uR4g6mKExCBh6W<^+T$6|Q)j+ocegkftc>sIyM&>l|0YJ*flWEUmMFe;W zToiZ(UGIpDg=dJk3|C8>VH+Il&<e2k@yJUt7oD-!7~lGX{jiaEr9n(yc_U(eCs{RIvC#KxZ#6Db-;~pWg6en zd==q8z&AkmfrnuXZU8vLjMeyR4>-e%;GSYRfckrx3@72<0-fRJQoN7w0^afh)ZfNr zI1N`0I>SrxHAWTl0P62xGMt9Px70T z_uhmih<6?tJ^{A{xbZDXeq+*e7aZseZ-eUw-UX#y1i9ZNz{6 zG1>um7?#KI)gy3*HE@Kt0vVq3OVl-RhS$KA18)G#!<&U$fUAJd!qoydzFWxe8Rox; z_69n`9dH927f^qTkl|ORQRbjCd;u;C@9!Dk{4>4>XnZS>-w?bv6>mm>pP@Ys{S`RF zop4d$48H(33f%ZMpYh#3e!owDa6mAr_@r^xxdvACdY$x8CW9T{yWdq#!&fSCR%$s=pHr`77dRjYp7~Xph z+Ana1e}sDwIK%bDu+zYeZ_pXvr!&4?$8XxLErs3vlYk{zVnv!2Atu2IDRK?_Z3L%&jQ{HycRe^TNTOzIKy+{BET8e z!`%Vg_^z7q%{AkDY{s|S_zkxOJMeBd-Z^8q4^9Qn@Xv7Nz!|;)w*@%EysMxOaE8~w zxqutrVB@#hrr^E=I>U#rMi~G%zM01Ft?BQnF}!CN>iUF${lN2qGrYAH?;`>?zG-HB z@67lX8o!ZNbuGpm@H4y#t_L{7OuS__0-WI`aAUxYZ;=__DdYFcTpz)EhOY|vTj1ru zHFzERI`DG9o8Ybk-T~NzH_3d!jqj83J7s@_>js_Sb&Z%8Fc091O?Z0+IOdr&=)zb5 z9CK3oMKfr?F^?r5+*`nV02g^sN3Ven7=gaI7cj5L`QOtUYL9 zD|mrpJwkrCL%^|ip((igfMY#F7sK@c$J&R+;G)2>9-^!O`XF$uov0h`VcLfl~x}!mS35wJSXaR}LKOSz6YH`~$ZG_P|vE$9kAvhN}gRwKE0b zt^?iy_%NIYcnoms9)tmR0NxLGEASD(mF?)Kz_E`&`{25OW6y!o_o6I-V;_Qoa1R2< z8lL_FHvk;#dfEXu3LJX?6oVTB9tT|dF|;M%*h8QmxMRSvuRt%u#erk*gC^i6nIEv> z<0vcO*gK&=z|l0^K9nt77VrkZU&G}C$9@c5c^k?KIQDL6C3Y=KfZGB4;5Goq-Vc57 zc9a#<0Y3#-0~~utl<^6a6>#h)Q43rHaO^!%)_#-~aO_9X$KWE&54idclofF7XVL$F z>jaKHGOE54;lLe$QMfNL9kBY7C@ZD|M&U-74!Gk}C@bLDKcg7jQQ+9SqmOl>tbk)b zkKTZr0FJ#s`Z`=3IQ9eS19ze9fZG9o26qBD_7m~rbKrd)G6B;Lp+5r0ek45zmk%6! zm-yLB)CX_}U=*%|>44SuKpxWpqi`299dO&H(I0_hKb3w6=KzkqSL*0Oe+1qInDZI* zN8l=849*7}`?*wqFZu~^?ETVhpG6$z2lU}w+177CAJ7^4;jC{8cnJ7X;0(V2X9sS4bBy04du$Z_8gzyid>i=&ZhTwp2cUI+ z2Qolscnt1A;5c`t8y|)p1Kt7n#3RrXxbaP}b&n#iaQuE4!`jD@@3#bW11|*5a1UGw zaO1mR{9f42PomsFXZRVo2H?i`y^L>v@f%=WKSo`Hhv7qT1Hc*n5bj~%#y7tB?XUBm zLH&Zx@JhH7z>V*D@jG9id=`5CRlp;_R|6jb{LwMAVc`5G-XGw2tv3Z&{Zq7C;P`zT z?YAh#pBIpSZ({2AcuyGca}sCxB%BH!hQEVz0smY6osYz?fV&bd^KYrh`)U6jlmOa3 zw1DeKRtB~R1p6%X`4+4<5yrpy@|omFn(yMv+FN}po`d3(=Szy$mf4GOjRB87;j_X^ zig(vuUb>-J4M%Xh!HH|Z?n{dIy2HhnU7DS7u`?WYw>J9rsu180UsBu_@?V7481OJd zxU|*V6bgg`&5=^PU~`c(+*)??T0A=8^n07}3eGjg)L7IST^2mE==lFNDK6@~;jriM z=;5)$6JMSB>NNH`NKMh&Ldj6YkY}iSXl!VD$U3}XnCvNZ945>Dz=j8^9&kKR_kibt zkp~J7+Yx`_aQyJ(;f@D8AMAS2^R?jDDvnegaUAJ7(tV`oNc70)k+CD=M`B0fM<$O< z9hp92joPAxQ8j9hZirSytD<$$hNveRjCMpjqg~PNXf!$!9gU7fW6_CdJUSVjj#96+ z*Ve1{mh{?tH}qEZI(qAR8+wDi?Y$knoxR0W1@!nYPMDJwpRPS^z_1XFg z`_#UYz72g9eN}yqzJ@+eU$C#eud}bKue-0OZ=`RuZ>(>;Z=x^WH`zDUNB!1*TYq7H zNx!{+Lw`lTqra}dq2JTr-rv#R+27UQ(;w|0=^yPM?~nCQ^vCGss+$rk_x- 1: + single_letters = False + alphabet.append(key) + elif dims == 2: + for letter in key: + if not isinstance(letter, str): + raise ValueError("expected string") + if len(letter) > 1: + single_letters = False + alphabet.append(letter) + else: + raise ValueError( + "data array should be 1- or 2- dimensional " + "(found %d dimensions) in key" % dims + ) + alphabet = sorted(set(alphabet)) + if single_letters: + alphabet = "".join(alphabet) + else: + alphabet = tuple(alphabet) + n = len(alphabet) + if dims == 1: + shape = (n,) + elif dims == 2: + shape = (n, n) + else: # dims is None + raise ValueError("data is an empty dictionary") + obj = super().__new__(cls, shape, dtype) + if dims == 1: + for i, key in enumerate(alphabet): + obj[i] = data.get(letter, 0.0) + elif dims == 2: + for i1, letter1 in enumerate(alphabet): + for i2, letter2 in enumerate(alphabet): + key = (letter1, letter2) + value = data.get(key, 0.0) + obj[i1, i2] = value + obj._alphabet = alphabet + return obj + if alphabet is None: + alphabet = string.ascii_uppercase + elif not (isinstance(alphabet, str) or isinstance(alphabet, tuple)): + raise ValueError("alphabet should be a string or a tuple") + n = len(alphabet) + if data is None: + if dims is None: + dims = 1 + elif dims not in (1, 2): + raise ValueError("dims should be 1 or 2 (found %s)" % dims) + shape = (n,) * dims + else: + if dims is None: + shape = data.shape + dims = len(shape) + if dims == 1: + pass + elif dims == 2: + if shape[0] != shape[1]: + raise ValueError("data array is not square") + else: + raise ValueError( + "data array should be 1- or 2- dimensional " + "(found %d dimensions) " % dims + ) + else: + shape = (n,) * dims + if data.shape != shape: + raise ValueError( + "data shape has inconsistent shape (expected (%s), found (%s))" + % (shape, data.shape) + ) + obj = super().__new__(cls, shape, dtype) + if data is None: + obj[:] = 0.0 + else: + obj[:] = data + obj._alphabet = alphabet + return obj + + def __array_finalize__(self, obj): + if obj is None: + return + self._alphabet = getattr(obj, "_alphabet", None) + + def _convert_key(self, key): + if isinstance(key, tuple): + indices = [] + for index in key: + if isinstance(index, str): + try: + index = self._alphabet.index(index) + except ValueError: + raise IndexError("'%s'" % index) from None + indices.append(index) + key = tuple(indices) + elif isinstance(key, str): + try: + key = self._alphabet.index(key) + except ValueError: + raise IndexError("'%s'" % key) from None + return key + + def __getitem__(self, key): + key = self._convert_key(key) + value = numpy.ndarray.__getitem__(self, key) + if value.ndim == 2: + if self.ndim == 2: + if value.shape != self.shape: + raise IndexError("Requesting truncated array") + elif self.ndim == 1: + length = self.shape[0] + if value.shape[0] == length and value.shape[1] == 1: + pass + elif value.shape[0] == 1 and value.shape[1] == length: + pass + else: + raise IndexError("Requesting truncated array") + elif value.ndim == 1: + if value.shape[0] != self.shape[0]: + value._alphabet = self.alphabet[key] + return value.view(Array) + + def __setitem__(self, key, value): + key = self._convert_key(key) + numpy.ndarray.__setitem__(self, key, value) + + def __contains__(self, key): + # Follow dict definition of __contains__ + return key in self.keys() + + def __array_prepare__(self, out_arr, context=None): + # needed for numpy older than 1.13.0 + ufunc, inputs, i = context + alphabet = self.alphabet + for arg in inputs: + if isinstance(arg, Array): + if arg.alphabet != alphabet: + raise ValueError("alphabets are inconsistent") + return numpy.ndarray.__array_prepare__(self, out_arr, context) + + def __array_wrap__(self, out_arr, context=None): + if len(out_arr) == 1: + return out_arr[0] + return numpy.ndarray.__array_wrap__(self, out_arr, context) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + args = [] + alphabet = self._alphabet + for arg in inputs: + if isinstance(arg, Array): + if arg.alphabet != alphabet: + raise ValueError("alphabets are inconsistent") + args.append(arg.view(numpy.ndarray)) + else: + args.append(arg) + + outputs = kwargs.pop("out", None) + if outputs: + out_args = [] + for arg in outputs: + if isinstance(arg, Array): + if arg.alphabet != alphabet: + raise ValueError("alphabets are inconsistent") + out_args.append(arg.view(numpy.ndarray)) + else: + out_args.append(arg) + kwargs["out"] = tuple(out_args) + else: + outputs = (None,) * ufunc.nout + + raw_results = super().__array_ufunc__(ufunc, method, *args, **kwargs) + if raw_results is NotImplemented: + return NotImplemented + + if method == "at": + return + + if ufunc.nout == 1: + raw_results = (raw_results,) + + results = [] + for raw_result, output in zip(raw_results, outputs): + if raw_result.ndim == 0: + result = raw_result + elif output is None: + result = numpy.asarray(raw_result).view(Array) + result._alphabet = self._alphabet + else: + result = output + result._alphabet = self._alphabet + results.append(result) + + return results[0] if len(results) == 1 else results + + def __reduce__(self): + import pickle + + values = numpy.array(self) + state = pickle.dumps(values) + alphabet = self._alphabet + dims = len(self.shape) + dtype = self.dtype + arguments = (Array, alphabet, dims, None, dtype) + return (Array.__new__, arguments, state) + + def __setstate__(self, state): + import pickle + + self[:, :] = pickle.loads(state) + + def transpose(self, axes=None): + """Transpose the array.""" + other = numpy.ndarray.transpose(self, axes) + other._alphabet = self._alphabet + return other + + @property + def alphabet(self): + """Return the alphabet property.""" + return self._alphabet + + def copy(self): + """Create and return a copy of the array.""" + other = Array(alphabet=self._alphabet, data=self) + return other + + def get(self, key, value=None): + """Return the value of the key if found; return value otherwise.""" + try: + return self[key] + except IndexError: + return value + + def items(self): + """Return an iterator of (key, value) pairs in the array.""" + dims = len(self.shape) + if dims == 1: + for index, key in enumerate(self._alphabet): + value = numpy.ndarray.__getitem__(self, index) + yield key, value + elif dims == 2: + for i1, c1 in enumerate(self._alphabet): + for i2, c2 in enumerate(self._alphabet): + key = (c1, c2) + value = numpy.ndarray.__getitem__(self, (i1, i2)) + yield key, value + else: + raise RuntimeError("array has unexpected shape %s" % self.shape) + + def keys(self): + """Return a tuple with the keys associated with the array.""" + dims = len(self.shape) + alphabet = self._alphabet + if dims == 1: + return tuple(alphabet) + elif dims == 2: + return tuple((c1, c2) for c2 in alphabet for c1 in alphabet) + else: + raise RuntimeError("array has unexpected shape %s" % self.shape) + + def values(self): + """Return a tuple with the values stored in the array.""" + dims = len(self.shape) + alphabet = self._alphabet + if dims == 1: + return tuple(self) + elif dims == 2: + n1, n2 = self.shape + return tuple( + numpy.ndarray.__getitem__(self, (i1, i2)) + for i2 in range(n2) + for i1 in range(n1) + ) + else: + raise RuntimeError("array has unexpected shape %s" % self.shape) + + def update(self, E=None, **F): + """Update the array from dict/iterable E and F.""" + if E is not None: + try: + alphabet = E.keys() + except AttributeError: + for key, value in E: + self[key] = value + else: + for key in E: + self[key] = E[key] + for key in F: + self[key] = F[key] + + def select(self, alphabet): + """Subset the array by selecting the letters from the specified alphabet.""" + ii = [] + jj = [] + for i, key in enumerate(alphabet): + try: + j = self._alphabet.index(key) + except ValueError: + continue + ii.append(i) + jj.append(j) + dims = len(self.shape) + a = Array(alphabet, dims=dims) + ii = numpy.ix_(*[ii] * dims) + jj = numpy.ix_(*[jj] * dims) + a[ii] = numpy.ndarray.__getitem__(self, jj) + return a + + def _format_1D(self, fmt): + _alphabet = self._alphabet + n = len(_alphabet) + words = [None] * n + lines = [] + try: + header = self.header + except AttributeError: + pass + else: + for line in header: + line = "# %s\n" % line + lines.append(line) + maxwidth = 0 + for i, key in enumerate(_alphabet): + value = self[key] + word = fmt % value + width = len(word) + if width > maxwidth: + maxwidth = width + words[i] = word + fmt2 = " %" + str(maxwidth) + "s" + for letter, word in zip(_alphabet, words): + word = fmt2 % word + line = letter + word + "\n" + lines.append(line) + text = "".join(lines) + return text + + def _format_2D(self, fmt): + alphabet = self.alphabet + n = len(alphabet) + words = [[None] * n for _ in range(n)] + lines = [] + try: + header = self.header + except AttributeError: + pass + else: + for line in header: + line = "# %s\n" % line + lines.append(line) + width = max(len(c) for c in alphabet) + line = " " * width + for j, c2 in enumerate(alphabet): + maxwidth = 0 + for i, c1 in enumerate(alphabet): + key = (c1, c2) + value = self[key] + word = fmt % value + width = len(word) + if width > maxwidth: + maxwidth = width + words[i][j] = word + fmt2 = " %" + str(maxwidth) + "s" + word = fmt2 % c2 + line += word + for i, c1 in enumerate(alphabet): + word = words[i][j] + words[i][j] = fmt2 % word + line = line.rstrip() + "\n" + lines.append(line) + for letter, row in zip(alphabet, words): + line = letter + "".join(row) + "\n" + lines.append(line) + text = "".join(lines) + return text + + def __format__(self, fmt): + return self.format(fmt) + + def format(self, fmt=""): + """Return a string representation of the array. + + The argument ``fmt`` specifies the number format to be used. + By default, the number format is "%i" if the array contains integer + numbers, and "%.1f" otherwise. + + """ + if fmt == "": + if numpy.issubdtype(self.dtype, numpy.integer): + fmt = "%i" + else: + fmt = "%.1f" + n = len(self.shape) + if n == 1: + return self._format_1D(fmt) + elif n == 2: + return self._format_2D(fmt) + else: + raise RuntimeError("Array has unexpected rank %d" % n) + + def __str__(self): + return self.format() + + def __repr__(self): + text = numpy.ndarray.__repr__(self) + alphabet = self._alphabet + if isinstance(alphabet, str): + assert text.endswith(")") + text = text[:-1] + ",\n alphabet='%s')" % self._alphabet + return text + + +def read(handle, dtype=float): + """Parse the file and return an Array object.""" + try: + fp = open(handle) + lines = fp.readlines() + except TypeError: + fp = handle + try: + lines = fp.readlines() + except Exception as e: + raise e from None + finally: + fp.close() + header = [] + for i, line in enumerate(lines): + if not line.startswith("#"): + break + header.append(line[1:].strip()) + rows = [line.split() for line in lines[i:]] + if len(rows[0]) == len(rows[1]) == 2: + alphabet = [key for key, value in rows] + for key in alphabet: + if len(key) > 1: + alphabet = tuple(alphabet) + break + else: + alphabet = "".join(alphabet) + matrix = Array(alphabet=alphabet, dims=1, dtype=dtype) + matrix.update(rows) + else: + alphabet = rows.pop(0) + for key in alphabet: + if len(key) > 1: + alphabet = tuple(alphabet) + break + else: + alphabet = "".join(alphabet) + matrix = Array(alphabet=alphabet, dims=2, dtype=dtype) + for letter1, row in zip(alphabet, rows): + assert letter1 == row.pop(0) + for letter2, word in zip(alphabet, row): + matrix[letter1, letter2] = float(word) + matrix.header = header + return matrix + + +def load(name=None): + """Load and return a precalculated substitution matrix. + + >>> from Bio.Align import substitution_matrices + >>> names = substitution_matrices.load() + """ + path = os.path.realpath(__file__) + directory = os.path.dirname(path) + subdirectory = os.path.join(directory, "data") + if name is None: + filenames = os.listdir(subdirectory) + return sorted(filenames) + path = os.path.join(subdirectory, name) + matrix = read(path) + return matrix diff --git a/code/lib/Bio/Align/substitution_matrices/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Align/substitution_matrices/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..23f023bfa842ea601a5804e4c394f5b2088e9059 GIT binary patch literal 13211 zcmbVTU5p#ob)G*Chg>dqwOX$wS+;4}vPDH+OR{4-imb}EtR}9paBRhLwz4UTGrQu7 zOHy}cEiOBB3dydW)(O-+1Vx*syGB~1MG*8M=tG+3twEoPq9}@DUi%Uk0s0c8fl;9S zzH?`W+_jRRU3o5N?%aFsne%_ny?$02Fh*_2;v*^*y-+5Sz#w|(aw!*>FE&ADYS7Xo+1^b5ZGj=5a)OMcP!aOe5ft46&% zTzGZkN*J}Hji}x0*4CSmYPW)L@guBaEVbBXf5d=)p=%mOU|?l~t1jEV#eadb;=E^I zC9YmtF35_;vMcKuvU2$y1M5!t75tX`X}^l!vj4O{N zq316DLH{BAF8GK2hw*#2f7*YJ^TGn`N!~kz<<DLkuGb94A~cVoT3S!?p47H(W=b(&#V zYj^!%5cst#o4AdF)j)-{W>=m&K@{O}aj8|%WHt8S*TrR%#vQbg5!;b@%erl;r(^K> zT1g#@KZjf#9x`IfOY6&rCWcDC&i*5$Wj_P-2nMioNDmmIicjiyG0*xs6s zi~jVs6_?|w)5dM{`u8sy-Q^U%r4|QE`JCr4r!txzJel_#MNf4!6Hmu8z(GSR5ZY)q zo(058TwSpQ>c8Mc%Hdd1k4XH2U4UdPO8P$ya6ixZaTwOZfvjZ8I?fe66FT{ zq>`onxqA}#7C4Z`;ZN+&SXXt>A8NwnF#Fcn!jhOEP*9W9GpJ z^sJ<~M7z18zr|Ls`O|S3*3oxqQP7PreLcVfIxxE5?C#_*mT{cbH-??;>R6+oh?HdnEf{0$$z!}m8+ zl(=TI7H#x9LETa?U528!7;?QyLYxrfa(u$N5*qBA_!LmF+XIcSZ!}dfJUj7bO!oi7 zM-HD62;tRcKd4=8lB^Tt*4;;P0v|aX){oURwRm5z&n4w{m`Oq6kiALywPt4{ctNS2 zN^I~Xahv^q(DjpoP$028K{s*3o&wJ>AczuYt=H}*g>a)EsHE6vK)@P}q!hqu1PU^j z*sB;-YGfqFY+-dW(+pefc4GtZRjU~WfWa@R?@8=dCrrG20x(+-fgc1hDfrQ5A27k) zYPBM=>8d==&SfkPIwxdR4vA?P45a)1(-$!Fu71K-qIcCKy z!>%JRYz2zT&sUOed3hzS$i)~!UE|>=YMXqngHr zI_d5iS&dM?n?n^xJ^}TwCZL+TpqfIuq4UV$@R7Qw_~mMf4L?aJm=q*JQyw~!^5+?? ziCQbh;dMuSiUaTE%OVCcFjkMUYZt964G8{Qfr=VXxwkPOtf4U=2b3X7%B)#6=W$Iw z4d+t&PfXrH`CLT)3BY1w3xdHgCNkHoEeHpmEV|b<2d>&u-vr(D-E-(G;HiK>?TcSl-(h$-4p_@5tsz-o@;X zb;%ytBM0%Wv1W|i!I9X3d8r{cFp(S35G=&SH6wN*Rtpj0JBj)iU^1Mw!j?;wAQ`pV zAIN+o!ZL*@ZNl%@i&{3sA`nwC029>SlNryx3i_DcXuy8(>yu({BcfqViu~YU5G93; z6$owOw!8g}C{zy+93?NYn`(6zq0}k7_%5!nipCIELJ6Yt7~Y*pkcf68Upgn)4mo-V z^JyPpN2&eua2nQrx{usCq@=KyCe+AL#sMDO~ z7bID9<1$jsGH{$4_|XiKIq#MQOkdMUU|d<7^<5cxJg$tY-Q)2zg9D_Q7oy_Up187Y zf)*A$L6vF}R~t-frpHh56uzlS{E-+^Ip*RiNQqwk_u?tvrJTf5P?O@)@K>k_g2Rmn z)+n)16{7G-v|isUHdVV*Q690g)Mjy;>eIX%TQ0lb>xg=CY?XcQss|Qq3C;6@(j|C`^j!&2o9%o49&X;_8=mQ#}G$S{E2ysdH%l7FS5S zWx&ldIOIRioVOVKIjQSUdk_$otp$c%2sa%Dhn6FLgYytD;t64>HN0!5Dy7tE(FO8v zhsk^i9SjsZDE&m{mK7V22@7Qwcy-2JkPc_$pyXk!xrnFuTO7G4kD%b=!-%dFpm*L{ zDMswIqK&3za5-`N?bfwUAY4oeLVjcwsBndv_hLktKd6IjL<$(Kr&dxe;XZLCaR`yJ zAY=_ROAcgnk`nlY?57KpxzofX6~1jeVihYx-ek4_^Ts>EDekJRe-Dd|{*U zUl^78TyMhnjR)*NlFnJ2E{PHuYr?`bs6z;ia0wS(@NoWuYIehZFAQqYRTL-~`Ya+w z7%N#R%0>u-QnZ(y<{$tAdYGklVXiJ>)&C~KG|q-qr6r%yRLz0qf=9~m5_~ypP$)4Gv8+PUS3M-G5s*aL4x{lSvBOSIi%3`itD^;bSk=pCVDi-|Upcjy5yJ6- zUSOp8RSR`K3G{~#XZ0JDI+RALS!?xBmF=xe(#_JMcn|TiwCw8aO5^xC`ghSPaR{bJ zOo;}GH7sjf`67396g{e_Q0WkEHa*59G`C2C3?xAZb-F^nz7FgesSKZ)prEKshA*Nn z7MG+P@!9N?(h=n7jdqB_6llw6H|#{n8jcg+%VF=*jce&gV0>R59wNm;R(W& z4+mE!W}d^$0^+<=6Me_fSA-jPrTRLCiV$FBH)Ocy>#qZPH7dd}Ln3rl(sw}D!tkk- zuFY-@aWl#+C{mHMM?lwOHC@%H^_y+ffWZ*^I13A=;l=-T{Su!F2UT5 zEg|JnTgO~2C5cEXuWWQ9)b%8@te3Qui6N9+a?CJFOk%Z8sE72;$=zaDVEiBMB@_2y z2;)ci8?bR!e&^B7!T$`WG*e}C&elwcD5T-bXeQWdtXW@yph_fJi|qH2ol6;+EISj~ z`L>StzHtuk_!bcqJxWG$<_g6z$#pHTcB36#70NP!s9{H>*J?|8m%k7KFOEr`qf0cY zY|O*L8k*R6W8cUm0&i5A_Qqx^U*+pC<+MdhLa(}=Sc}KiB}`GjjRyK50gBijI`5B5 zr&j}VQ=K`Q86U?O-m8g5OczggdaY(BJhPaO9R4~{C`^a~&55owEE9n=aT1mjiwq-) zB%VgcgG6JT5$H14f|c1AqLkBxG~p#|yo{U3c;8$DOF$ch{tN(eHVUIs&0FHAOJFlJ z6%2h3)RUb$FM_U8k5jf{-;N_wP10qjw zM(kox-oVyJ4peK4*O_3bqvPuO~4byQB)(8%B=2*38y2p zZs5&Rq_q$c8T{~m&2;@8{~xxycVzozA|t#{Ye>R-is7==)oKl?XmXVJMZxY1cw8iQ z6d#=%YDr)LsbW^0VPz8AJeHt4rcM7XTw#nxB%q|;+{B&~6L~4aEy4qt@IcRL4>Z^( zrMp>JTjtwP<=8oGTt9lzKuPZ4>qhn-mK@0siZ}ZGix>IE0<&iE21!r;#=n^PvtwWE zqYOO}s?=6gZ(a5j9uq@+r4!U%kbThea9`MN1kOqIS(G)~S2m*5PP#&w#C#z!&uax9 z8%j~-T%QwnLl!(Urk$rL2Fo}H8_9xfMms$;r(Fq4^Lidv_!nqc*2HBGUe?JqzLZ`H zG?Ee|V>IlRj$C-11yIxmMbB!A-k8ganGW^`k$zzh(1~41Biv(rZGNz`zj`}Ww3%Ka8MnNbbKPLWj75RRoa ze@%Xl3$c&XUTL$umPTp}&?uYriW>VbaZ8jaozIP399@WL-m={uG^8*kE-rIwQueUg z?PY6iO&3DQv&3AJH4?MA+o8D{;g`rc*1-+rK@NPFE|{{&&Z=7l9GpdO#zQ?R=iwgA zBeufBpW_B@iIjv9fmO791JtyUvOC~wVY7_4mhf*r<=-jIKXC65?c1>R1Xu8&nJnC= zCaCjlSgfh<(JE-KtVeq8{pd{!H+srPQqgI5Q4a60%i(?Etv3fZ+CECD4-iDIu8G4L zn8$aC1864|nVmSSpY0Zja$}{ZupQJmaqdlu#S<85q&qGS1cM^sLa7p3U7;&M0gheI=<|~FoI|x_b!N$M#OcWysk0Zikj|xzJ3XcQj zKfg8-HkBXWXAfftExBiP?aKqugpaqoe(}e7b|}_EvAbf97q9 z7(u(n@FDG8Gx=2R@!?={*y(gK!nN%hsRhYF4U!lp)7*^*sSH+uGRoRU1K8}U(tS3?Du8%Qd!43pV&}XZT6}7D0Xqvxj$h5 z=5=(k&S?@ucX`)iBcS@c3)%3PfDXB@^X>sQ48F%!Kq)>(Y()8O)w`iK_((^dF^T*^ z0VvhkljnYi9XL*FOO!pU>FS)dq@ zlj29pxXHHMM5?d>?rOBMcwWdtxH)2k&EcvDJp4*+IXY%Sy}4XMinEHF0YVJt+37m(hbrgFTT4LzAA9 z@r(E#Z}^{iOs|g`puT_|hY$t1rkn)aQ8Ou*Yw6zwsXyt;UG4H^81u`Q^SC0EXarky zR|18y1hYk=Mi}^uIgsaY!X#K}qW*g94l{59vA(lSI!A-Wkgd{`G(dDr8 zYT+@7Cl4K7Jh5`9CiO0CUcEt z90S3L1vyzEMNjSs3nmi@z*`uo-ex1Qf4Y|7GCZC`U|E>H zO)e`7cr+<cLZ9wXaO2SvaJ$j?!Gvt)XB6a3z6fQKmn?D) zH(7-e#+fo{Bf>%k5b?a=S>zem*55Y6qfrqXHn7RQ^)9xfq$-2Wez*o*rE``(HtQe# z5B`SxkL5hf!_jACs~%+)l3Z_mdaMTbfwK-lnC!^@ed6K7O~^U|Qil*HQ7E;##FElo zjLSInAJhF^!1y~-fR@pKd0r0Ww>XJFz?d({}ZN1W0w8>nKoD;n#cq22R*C zxC@F?OP%x$Xzlk}=q{|FlpM)v7Ilp1A7{fPSaG9PeTK~`HfPz8*6ISA*VwR3n|N74 zRh!{2@dX{BdV>w~C-nt30h>0Pu_*p7A1P|;CL4ZyGMygfX$74QJ~lUE5F6R(5b>lv zluf%Qal>7Ug;lTM&3W^l<2mJWxllRbO?ic~TPdI|db4<%Emtw(UhfglF5l(ZQeDl> z+$`Ws;}VhjIT)}x&l(is7OBmriRzn(tSm%UaBKV*Ba|PWLMn^ueLCVu8n`1q966>j zEMs@hylEkNK?QV_yYj%$tCwqc_BM`W>SSHkho z5_cf|@>gvn8&f!9iXBlv{xJ5osy6xaB=c(^a9Bi+2lM*qNG%P7yz==Pxeo5(tK`ZFf8YYES z2YW^m;w5G5QK?7_reu8FmSeO>8>nlQ)OEcbB4&hIs$;?Tc=rLD4x0Kb(nl~1+k*%3 zm{=z$#=Xpec^X5UAdiAG(mvfN*gd7x` zM6a#gT*T9Bs7zv04h1mWGXyh{%UN)9*zCa8lZAc$vk1^|+!^m3?vVBxf-aPrTN@pzi%%Yk)c-TaU{S{~XU?3_g&drqT$B@(wf1@+ zXH5Bj8+`I0rTA9Pp*IR^&)j*`A~!jY)Mc>Zh8V(~#Oh%ez2Agm^}vLVzC&CKMTIvj z`fWv+QX;@9QDjo+QMBtq*}98!SbYCy=t)X=2QBWY&7{KcDSs*voZzOu$*H^9PsDyk z_OmcyxHxwy%+z_mXR7a@m%h2A{(#fJ#pW(HdE}r)S;Uqcml+ew$bvKgiFaCi@G~Bs TWIb-u= 45 +# Entropy = 0.3795, Expected = -0.2789 + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -2 -2 0 -1 -1 0 -5 +R -2 7 0 -1 -3 1 0 -2 0 -3 -2 3 -1 -2 -2 -1 -1 -2 -1 -2 -1 0 -1 -5 +N -1 0 6 2 -2 0 0 0 1 -2 -3 0 -2 -2 -2 1 0 -4 -2 -3 4 0 -1 -5 +D -2 -1 2 7 -3 0 2 -1 0 -4 -3 0 -3 -4 -1 0 -1 -4 -2 -3 5 1 -1 -5 +C -1 -3 -2 -3 12 -3 -3 -3 -3 -3 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -2 -3 -2 -5 +Q -1 1 0 0 -3 6 2 -2 1 -2 -2 1 0 -4 -1 0 -1 -2 -1 -3 0 4 -1 -5 +E -1 0 0 2 -3 2 6 -2 0 -3 -2 1 -2 -3 0 0 -1 -3 -2 -3 1 4 -1 -5 +G 0 -2 0 -1 -3 -2 -2 7 -2 -4 -3 -2 -2 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -5 +H -2 0 1 0 -3 1 0 -2 10 -3 -2 -1 0 -2 -2 -1 -2 -3 2 -3 0 0 -1 -5 +I -1 -3 -2 -4 -3 -2 -3 -4 -3 5 2 -3 2 0 -2 -2 -1 -2 0 3 -3 -3 -1 -5 +L -1 -2 -3 -3 -2 -2 -2 -3 -2 2 5 -3 2 1 -3 -3 -1 -2 0 1 -3 -2 -1 -5 +K -1 3 0 0 -3 1 1 -2 -1 -3 -3 5 -1 -3 -1 -1 -1 -2 -1 -2 0 1 -1 -5 +M -1 -1 -2 -3 -2 0 -2 -2 0 2 2 -1 6 0 -2 -2 -1 -2 0 1 -2 -1 -1 -5 +F -2 -2 -2 -4 -2 -4 -3 -3 -2 0 1 -3 0 8 -3 -2 -1 1 3 0 -3 -3 -1 -5 +P -1 -2 -2 -1 -4 -1 0 -2 -2 -2 -3 -1 -2 -3 9 -1 -1 -3 -3 -3 -2 -1 -1 -5 +S 1 -1 1 0 -1 0 0 0 -1 -2 -3 -1 -2 -2 -1 4 2 -4 -2 -1 0 0 0 -5 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -1 -1 2 5 -3 -1 0 0 -1 0 -5 +W -2 -2 -4 -4 -5 -2 -3 -2 -3 -2 -2 -2 -2 1 -3 -4 -3 15 3 -3 -4 -2 -2 -5 +Y -2 -1 -2 -2 -3 -1 -2 -3 2 0 0 -1 0 3 -3 -2 -1 3 8 -1 -2 -2 -1 -5 +V 0 -2 -3 -3 -1 -3 -3 -3 -3 3 1 -2 1 0 -3 -1 0 -3 -1 5 -3 -3 -1 -5 +B -1 -1 4 5 -2 0 1 -1 0 -3 -3 0 -2 -3 -2 0 0 -4 -2 -3 4 2 -1 -5 +Z -1 0 0 1 -3 4 4 -2 0 -3 -2 1 -1 -3 -1 0 -1 -2 -2 -3 2 4 -1 -5 +X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 -2 -1 -1 -1 -1 -1 -5 +* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1 diff --git a/code/lib/Bio/Align/substitution_matrices/data/BLOSUM50 b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM50 new file mode 100644 index 0000000..3f62e3c --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM50 @@ -0,0 +1,31 @@ +# Matrix made by matblas from blosum50.iij +# * column uses minimum score +# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units +# Blocks Database = /data/blocks_5.0/blocks.dat +# Cluster Percentage: >= 50 +# Entropy = 0.4808, Expected = -0.3573 + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -1 -1 -5 +R -2 7 -1 -2 -4 1 0 -3 0 -4 -3 3 -2 -3 -3 -1 -1 -3 -1 -3 -1 0 -1 -5 +N -1 -1 7 2 -2 0 0 0 1 -3 -4 0 -2 -4 -2 1 0 -4 -2 -3 4 0 -1 -5 +D -2 -2 2 8 -4 0 2 -1 -1 -4 -4 -1 -4 -5 -1 0 -1 -5 -3 -4 5 1 -1 -5 +C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -3 -2 -5 +Q -1 1 0 0 -3 7 2 -2 1 -3 -2 2 0 -4 -1 0 -1 -1 -1 -3 0 4 -1 -5 +E -1 0 0 2 -3 2 6 -3 0 -4 -3 1 -2 -3 -1 -1 -1 -3 -2 -3 1 5 -1 -5 +G 0 -3 0 -1 -3 -2 -3 8 -2 -4 -4 -2 -3 -4 -2 0 -2 -3 -3 -4 -1 -2 -2 -5 +H -2 0 1 -1 -3 1 0 -2 10 -4 -3 0 -1 -1 -2 -1 -2 -3 2 -4 0 0 -1 -5 +I -1 -4 -3 -4 -2 -3 -4 -4 -4 5 2 -3 2 0 -3 -3 -1 -3 -1 4 -4 -3 -1 -5 +L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 5 -3 3 1 -4 -3 -1 -2 -1 1 -4 -3 -1 -5 +K -1 3 0 -1 -3 2 1 -2 0 -3 -3 6 -2 -4 -1 0 -1 -3 -2 -3 0 1 -1 -5 +M -1 -2 -2 -4 -2 0 -2 -3 -1 2 3 -2 7 0 -3 -2 -1 -1 0 1 -3 -1 -1 -5 +F -3 -3 -4 -5 -2 -4 -3 -4 -1 0 1 -4 0 8 -4 -3 -2 1 4 -1 -4 -4 -2 -5 +P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -1 -2 -5 +S 1 -1 1 0 -1 0 -1 0 -1 -3 -3 0 -2 -3 -1 5 2 -4 -2 -2 0 0 -1 -5 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 2 5 -3 -2 0 0 -1 0 -5 +W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1 1 -4 -4 -3 15 2 -3 -5 -2 -3 -5 +Y -2 -1 -2 -3 -3 -1 -2 -3 2 -1 -1 -2 0 4 -3 -2 -2 2 8 -1 -3 -2 -1 -5 +V 0 -3 -3 -4 -1 -3 -3 -4 -4 4 1 -3 1 -1 -3 -2 0 -3 -1 5 -4 -3 -1 -5 +B -2 -1 4 5 -3 0 1 -1 0 -4 -4 0 -3 -4 -2 0 0 -5 -3 -4 5 2 -1 -5 +Z -1 0 0 1 -3 4 5 -2 0 -3 -3 1 -1 -4 -1 0 -1 -2 -2 -3 2 5 -1 -5 +X -1 -1 -1 -1 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1 0 -3 -1 -1 -1 -1 -1 -5 +* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1 diff --git a/code/lib/Bio/Align/substitution_matrices/data/BLOSUM62 b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM62 new file mode 100644 index 0000000..205f139 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM62 @@ -0,0 +1,31 @@ +# Matrix made by matblas from blosum62.iij +# * column uses minimum score +# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units +# Blocks Database = /data/blocks_5.0/blocks.dat +# Cluster Percentage: >= 62 +# Entropy = 0.6979, Expected = -0.5209 + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 +R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 +N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 +D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 +C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 +Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 +E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 +G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 +H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 +I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 +L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 +K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 +M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 +F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 +P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 +S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 +W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 +Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 +V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 +B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 +Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 +X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 +* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 diff --git a/code/lib/Bio/Align/substitution_matrices/data/BLOSUM80 b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM80 new file mode 100644 index 0000000..78172a3 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM80 @@ -0,0 +1,31 @@ +# Matrix made by matblas from blosum80_3.iij +# * column uses minimum score +# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units +# Blocks Database = /data/blocks_5.0/blocks.dat +# Cluster Percentage: >= 80 +# Entropy = 0.9868, Expected = -0.7442 + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1 -8 +R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2 -8 +N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2 -8 +D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3 -8 +C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -8 +Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2 -8 +E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2 -8 +G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -8 +H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2 -8 +I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2 -8 +L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2 -8 +K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2 -8 +M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2 -8 +F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3 -8 +P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -8 +S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1 -8 +T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1 -8 +W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5 -8 +Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3 -8 +V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2 -8 +B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3 -8 +Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1 -8 +X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -8 +* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 diff --git a/code/lib/Bio/Align/substitution_matrices/data/BLOSUM90 b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM90 new file mode 100644 index 0000000..71441b5 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/BLOSUM90 @@ -0,0 +1,31 @@ +# Matrix made by matblas from blosum90.iij +# * column uses minimum score +# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units +# Blocks Database = /data/blocks_5.0/blocks.dat +# Cluster Percentage: >= 90 +# Entropy = 1.1806, Expected = -0.8887 + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 5 -2 -2 -3 -1 -1 -1 0 -2 -2 -2 -1 -2 -3 -1 1 0 -4 -3 -1 -2 -1 -1 -6 +R -2 6 -1 -3 -5 1 -1 -3 0 -4 -3 2 -2 -4 -3 -1 -2 -4 -3 -3 -2 0 -2 -6 +N -2 -1 7 1 -4 0 -1 -1 0 -4 -4 0 -3 -4 -3 0 0 -5 -3 -4 4 -1 -2 -6 +D -3 -3 1 7 -5 -1 1 -2 -2 -5 -5 -1 -4 -5 -3 -1 -2 -6 -4 -5 4 0 -2 -6 +C -1 -5 -4 -5 9 -4 -6 -4 -5 -2 -2 -4 -2 -3 -4 -2 -2 -4 -4 -2 -4 -5 -3 -6 +Q -1 1 0 -1 -4 7 2 -3 1 -4 -3 1 0 -4 -2 -1 -1 -3 -3 -3 -1 4 -1 -6 +E -1 -1 -1 1 -6 2 6 -3 -1 -4 -4 0 -3 -5 -2 -1 -1 -5 -4 -3 0 4 -2 -6 +G 0 -3 -1 -2 -4 -3 -3 6 -3 -5 -5 -2 -4 -5 -3 -1 -3 -4 -5 -5 -2 -3 -2 -6 +H -2 0 0 -2 -5 1 -1 -3 8 -4 -4 -1 -3 -2 -3 -2 -2 -3 1 -4 -1 0 -2 -6 +I -2 -4 -4 -5 -2 -4 -4 -5 -4 5 1 -4 1 -1 -4 -3 -1 -4 -2 3 -5 -4 -2 -6 +L -2 -3 -4 -5 -2 -3 -4 -5 -4 1 5 -3 2 0 -4 -3 -2 -3 -2 0 -5 -4 -2 -6 +K -1 2 0 -1 -4 1 0 -2 -1 -4 -3 6 -2 -4 -2 -1 -1 -5 -3 -3 -1 1 -1 -6 +M -2 -2 -3 -4 -2 0 -3 -4 -3 1 2 -2 7 -1 -3 -2 -1 -2 -2 0 -4 -2 -1 -6 +F -3 -4 -4 -5 -3 -4 -5 -5 -2 -1 0 -4 -1 7 -4 -3 -3 0 3 -2 -4 -4 -2 -6 +P -1 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -3 -4 8 -2 -2 -5 -4 -3 -3 -2 -2 -6 +S 1 -1 0 -1 -2 -1 -1 -1 -2 -3 -3 -1 -2 -3 -2 5 1 -4 -3 -2 0 -1 -1 -6 +T 0 -2 0 -2 -2 -1 -1 -3 -2 -1 -2 -1 -1 -3 -2 1 6 -4 -2 -1 -1 -1 -1 -6 +W -4 -4 -5 -6 -4 -3 -5 -4 -3 -4 -3 -5 -2 0 -5 -4 -4 11 2 -3 -6 -4 -3 -6 +Y -3 -3 -3 -4 -4 -3 -4 -5 1 -2 -2 -3 -2 3 -4 -3 -2 2 8 -3 -4 -3 -2 -6 +V -1 -3 -4 -5 -2 -3 -3 -5 -4 3 0 -3 0 -2 -3 -2 -1 -3 -3 5 -4 -3 -2 -6 +B -2 -2 4 4 -4 -1 0 -2 -1 -5 -5 -1 -4 -4 -3 0 -1 -6 -4 -4 4 0 -2 -6 +Z -1 0 -1 0 -5 4 4 -3 0 -4 -4 1 -2 -4 -2 -1 -1 -4 -3 -3 0 4 -1 -6 +X -1 -2 -2 -2 -3 -1 -2 -2 -2 -2 -2 -1 -1 -2 -2 -1 -1 -3 -2 -2 -2 -1 -2 -6 +* -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 1 diff --git a/code/lib/Bio/Align/substitution_matrices/data/DAYHOFF b/code/lib/Bio/Align/substitution_matrices/data/DAYHOFF new file mode 100644 index 0000000..e8aecac --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/DAYHOFF @@ -0,0 +1,27 @@ +# M.O. Dayhoff, R.M. Schwartz, and B.C. Orcutt: +# "A Model of Evolutionary Change in Proteins." +# Margaret O. Dayhoff: Atlas of Protein Sequence and Structure, +# Volume 5, Supplement 3, 1978, pages 345-352. +# The National Biomedical Research Foundation, 1979. +# Figure 84, page 352. + A C D E F G H I K L M N P Q R S T V W Y +A 0.2 -0.2 0.0 0.0 -0.4 0.1 -0.1 -0.1 -0.1 -0.2 -0.1 0.0 0.1 0.0 -0.2 0.1 0.1 0.0 -0.6 -0.3 +C -0.2 1.2 -0.5 -0.5 -0.4 -0.3 -0.3 -0.2 -0.5 -0.6 -0.5 -0.4 -0.3 -0.5 -0.4 0.0 -0.2 -0.2 -0.8 0.0 +D 0.0 -0.5 0.4 0.3 -0.6 0.1 0.1 -0.2 0.0 -0.4 -0.3 0.2 -0.1 0.2 -0.1 0.0 0.0 -0.2 -0.7 -0.4 +E 0.0 -0.5 0.3 0.4 -0.5 0.0 0.1 -0.2 0.0 -0.3 -0.2 0.1 -0.1 0.2 -0.1 0.0 0.0 -0.2 -0.7 -0.4 +F -0.4 -0.4 -0.6 -0.5 0.9 -0.5 -0.2 0.1 -0.5 0.2 0.0 -0.4 -0.5 -0.5 -0.4 -0.3 -0.3 -0.1 0.0 0.7 +G 0.1 -0.3 0.1 0.0 -0.5 0.5 -0.2 -0.3 -0.2 -0.4 -0.3 0.0 -0.1 -0.1 -0.3 0.1 0.0 -0.1 -0.7 -0.5 +H -0.1 -0.3 0.1 0.1 -0.2 -0.2 0.6 -0.2 0.0 -0.2 -0.2 0.2 0.0 0.3 0.2 -0.1 -0.1 -0.2 -0.3 0.0 +I -0.1 -0.2 -0.2 -0.2 0.1 -0.3 -0.2 0.5 -0.2 0.2 0.2 -0.2 -0.2 -0.2 -0.2 -0.1 0.0 0.4 -0.5 -0.1 +K -0.1 -0.5 0.0 0.0 -0.5 -0.2 0.0 -0.2 0.5 -0.3 0.0 0.1 -0.1 0.1 0.3 0.0 0.0 -0.2 -0.3 -0.4 +L -0.2 -0.6 -0.4 -0.3 0.2 -0.4 -0.2 0.2 -0.3 0.6 0.4 -0.3 -0.3 -0.2 -0.3 -0.3 -0.2 0.2 -0.2 -0.1 +M -0.1 -0.5 -0.3 -0.2 0.0 -0.3 -0.2 0.2 0.0 0.4 0.6 -0.2 -0.2 -0.1 0.0 -0.2 -0.1 0.2 -0.4 -0.2 +N 0.0 -0.4 0.2 0.1 -0.4 0.0 0.2 -0.2 0.1 -0.3 -0.2 0.2 -0.1 0.1 0.0 0.1 0.0 -0.2 -0.4 -0.2 +P 0.1 -0.3 -0.1 -0.1 -0.5 -0.1 0.0 -0.2 -0.1 -0.3 -0.2 -0.1 0.6 0.0 0.0 0.1 0.0 -0.1 -0.6 -0.5 +Q 0.0 -0.5 0.2 0.2 -0.5 -0.1 0.3 -0.2 0.1 -0.2 -0.1 0.1 0.0 0.4 0.1 -0.1 -0.1 -0.2 -0.5 -0.4 +R -0.2 -0.4 -0.1 -0.1 -0.4 -0.3 0.2 -0.2 0.3 -0.3 0.0 0.0 0.0 0.1 0.6 0.0 -0.1 -0.2 0.2 -0.4 +S 0.1 0.0 0.0 0.0 -0.3 0.1 -0.1 -0.1 0.0 -0.3 -0.2 0.1 0.1 -0.1 0.0 0.2 0.1 -0.1 -0.2 -0.3 +T 0.1 -0.2 0.0 0.0 -0.3 0.0 -0.1 0.0 0.0 -0.2 -0.1 0.0 0.0 -0.1 -0.1 0.1 0.3 0.0 -0.5 -0.3 +V 0.0 -0.2 -0.2 -0.2 -0.1 -0.1 -0.2 0.4 -0.2 0.2 0.2 -0.2 -0.1 -0.2 -0.2 -0.1 0.0 0.4 -0.6 -0.2 +W -0.6 -0.8 -0.7 -0.7 0.0 -0.7 -0.3 -0.5 -0.3 -0.2 -0.4 -0.4 -0.6 -0.5 0.2 -0.2 -0.5 -0.6 1.7 0.0 +Y -0.3 0.0 -0.4 -0.4 0.7 -0.5 0.0 -0.1 -0.4 -0.1 -0.2 -0.2 -0.5 -0.4 -0.4 -0.3 -0.3 -0.2 0.0 1.0 diff --git a/code/lib/Bio/Align/substitution_matrices/data/FENG b/code/lib/Bio/Align/substitution_matrices/data/FENG new file mode 100644 index 0000000..ebd5c2d --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/FENG @@ -0,0 +1,26 @@ +# D.F. Feng, M.S. Johnson, R.F. Doolittle: +# "Aligning amino acid sequences: Comparison of commonly used methods." +# Journal of Molecular Evolution 21(2): 112-125 (1985). +# Table 1, upper triangle. +# PMID 6100188 + A C D E F G H I K L M N P Q R S T V W Y +A 6 2 4 4 2 5 2 2 3 2 2 3 5 3 2 5 5 5 2 2 +C 2 6 1 0 3 3 2 2 0 2 2 2 2 1 2 4 2 2 3 3 +D 4 1 6 5 1 4 3 1 3 1 0 5 2 4 2 3 2 3 0 2 +E 4 0 5 6 0 4 2 1 4 1 1 3 3 4 2 3 3 4 1 1 +F 2 3 1 0 6 1 2 4 0 4 2 1 2 1 1 3 1 4 3 5 +G 5 3 4 4 1 6 1 2 2 2 1 3 3 2 3 5 2 4 3 2 +H 2 2 3 2 2 1 6 1 3 3 1 4 3 4 4 3 2 1 1 3 +I 2 2 1 1 4 2 1 6 2 5 4 2 2 1 2 2 3 5 2 3 +K 3 0 3 4 0 2 3 2 6 2 2 4 2 4 5 3 4 3 1 1 +L 2 2 1 1 4 2 3 5 2 6 5 1 3 2 2 2 2 5 4 3 +M 2 2 0 1 2 1 1 4 2 5 6 1 2 2 2 1 3 4 3 2 +N 3 2 5 3 1 3 4 2 4 1 1 6 2 3 2 5 4 2 0 3 +P 5 2 2 3 2 3 3 2 2 3 2 2 6 3 3 4 4 3 2 2 +Q 3 1 4 4 1 2 4 1 4 2 2 3 3 6 3 3 3 2 1 2 +R 2 2 2 2 1 3 4 2 5 2 2 2 3 3 6 3 3 2 2 1 +S 5 4 3 3 3 5 3 2 3 2 1 5 4 3 3 6 5 2 2 3 +T 5 2 2 3 1 2 2 3 4 2 3 4 4 3 3 5 6 3 1 2 +V 5 2 3 4 4 4 1 5 3 5 4 2 3 2 2 2 3 6 3 3 +W 2 3 0 1 3 3 1 2 1 4 3 0 2 1 2 2 1 3 6 3 +Y 2 3 2 1 5 2 3 3 1 3 2 3 2 2 1 3 2 3 3 6 diff --git a/code/lib/Bio/Align/substitution_matrices/data/GENETIC b/code/lib/Bio/Align/substitution_matrices/data/GENETIC new file mode 100644 index 0000000..79fc69b --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/GENETIC @@ -0,0 +1,27 @@ +# S.A. Benner, M.A. Cohen, G.H. Gonnet: +# "Amino acid substitution during functionally constrained divergent evolution +# of protein sequences." +# Figure 5. +# Protein Engineering 7(11): 1323-1332 (1994). +# PMID 7700864 + A C D E F G H I K L M N P Q R S T V W Y +A 4.0 -1.9 1.0 1.3 -2.4 1.2 -2.1 -1.8 -1.9 -2.3 -2.0 -1.7 0.8 -2.1 -1.6 0.1 0.9 1.0 -2.2 -2.4 +C -1.9 5.5 -1.6 -3.0 1.8 1.0 -1.6 -1.9 -3.2 -1.3 -2.7 -1.5 -1.9 -3.1 0.7 1.5 -1.9 -2.2 4.1 2.6 +D 1.0 -1.6 4.8 3.8 -1.7 1.1 1.7 -2.1 0.3 -2.4 -2.5 1.7 -2.2 0.3 -2.3 -2.1 -2.1 1.0 -2.9 2.3 +E 1.3 -3.0 3.8 5.7 -2.9 1.4 0.3 -2.3 2.0 -2.5 -1.8 0.3 -2.1 2.0 -2.0 -2.8 -2.1 1.3 -3.2 -0.9 +F -2.4 1.8 -1.7 -2.9 4.5 -1.9 -1.1 1.3 -2.8 2.2 0.5 -1.3 -1.8 -2.1 -1.5 0.0 -2.1 1.0 0.0 2.0 +G 1.2 1.0 1.1 1.4 -1.9 4.2 -2.2 -2.5 -2.2 -2.2 -2.3 -2.6 -1.8 -2.1 0.8 -0.6 -2.1 1.1 1.4 -1.8 +H -2.1 -1.6 1.7 0.3 -1.1 -2.2 4.7 -1.8 0.6 -0.1 -1.8 1.8 0.7 3.6 3.6 -1.6 -1.8 -2.1 -2.1 2.3 +I -1.8 -1.9 -2.1 -2.3 1.3 -2.5 -1.8 4.1 0.7 1.2 3.3 0.9 -1.6 -1.9 -1.2 -0.5 0.8 1.0 -2.2 -1.6 +K -1.9 -3.2 0.3 2.0 -2.8 -2.2 0.6 0.7 5.6 -2.0 1.6 3.5 -1.5 2.2 -0.2 -1.5 1.0 -2.1 -3.0 -0.8 +L -2.3 -1.3 -2.4 -2.5 2.2 -2.2 -0.1 1.2 -2.0 3.4 1.5 -2.2 0.0 0.1 -0.4 -1.2 -1.9 1.1 -0.3 -1.6 +M -2.0 -2.7 -2.5 -1.8 0.5 -2.3 -1.8 3.3 1.6 1.5 5.4 0.1 -1.4 -1.2 -0.4 -1.3 0.7 1.0 -2.0 -2.9 +N -1.7 -1.5 1.7 0.3 -1.3 -2.6 1.8 0.9 3.5 -2.2 0.1 4.7 -1.6 0.4 -1.5 -0.3 0.9 -2.2 -3.0 2.5 +P 0.8 -1.9 -2.2 -2.1 -1.8 -1.8 0.7 -1.6 -1.5 0.0 -1.4 -1.6 3.8 1.0 0.3 0.4 1.1 -2.1 -1.6 -2.3 +Q -2.1 -3.1 0.3 2.0 -2.1 -2.1 3.6 -1.9 2.2 0.1 -1.2 0.4 1.0 5.5 0.3 -2.3 -1.7 -2.0 -2.3 -0.8 +R -1.6 0.7 -2.3 -2.0 -1.5 0.8 3.6 -1.2 -0.2 -0.4 -0.4 -1.5 0.3 0.3 2.9 0.3 -0.6 -2.1 1.8 -1.9 +S 0.1 1.5 -2.1 -2.8 0.0 -0.6 -1.6 -0.5 -1.5 -1.2 -1.3 -0.3 0.4 -2.3 0.3 2.6 1.0 -2.2 0.8 0.3 +T 0.9 -1.9 -2.1 -2.1 -2.1 -2.1 -1.8 0.8 1.0 -1.9 0.7 0.9 1.1 -1.7 -0.6 1.0 4.0 -2.2 -2.2 -2.1 +V 1.0 -2.2 1.0 1.3 1.0 1.1 -2.1 1.0 -2.1 1.1 1.0 -2.2 -2.1 -2.0 -2.1 -2.2 -2.2 4.1 -2.1 -2.2 +W -2.2 4.1 -2.9 -3.2 0.0 1.4 -2.1 -2.2 -3.0 -0.3 -2.0 -3.0 -1.6 -2.3 1.8 0.8 -2.2 -2.1 7.5 -0.5 +Y -2.4 2.6 2.3 -0.9 2.0 -1.8 2.3 -1.6 -0.8 -1.6 -2.9 2.5 -2.3 -0.8 -1.9 0.3 -2.1 -2.2 -0.5 6.5 diff --git a/code/lib/Bio/Align/substitution_matrices/data/GONNET1992 b/code/lib/Bio/Align/substitution_matrices/data/GONNET1992 new file mode 100644 index 0000000..ac4e821 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/GONNET1992 @@ -0,0 +1,26 @@ +# Gaston H. Gonnet, Mark A. Cohen, Steven A. Benner: +# "Exhaustive matching of the entire protein sequence database." +# Science 256(5062): 1443-1445 (1992). +# Figure 2. +# PMID 1604319 + A C D E F G H I K L M N P Q R S T V W Y +A 2.4 0.5 -0.3 0.0 -2.3 0.5 -0.8 -0.8 -0.4 -1.2 -0.7 -0.3 0.3 -0.2 -0.6 1.1 0.6 0.1 -3.6 -2.2 +C 0.5 11.5 -3.2 -3.0 -0.8 -2.0 -1.3 -1.1 -2.8 -1.5 -0.9 -1.8 -3.1 -2.4 -2.2 0.1 -0.5 0.0 -1.0 -0.5 +D -0.3 -3.2 4.7 2.7 -4.5 0.1 0.4 -3.8 0.5 -4.0 -3.0 2.2 -0.7 0.9 -0.3 0.5 0.0 -2.9 -5.2 -2.8 +E 0.0 -3.0 2.7 3.6 -3.9 -0.8 0.4 -2.7 1.2 -2.8 -2.0 0.9 -0.5 1.7 0.4 0.2 -0.1 -1.9 -4.3 -2.7 +F -2.3 -0.8 -4.5 -3.9 7.0 -5.2 -0.1 1.0 -3.3 2.0 1.6 -3.1 -3.8 -2.6 -3.2 -2.8 -2.2 0.1 3.6 5.1 +G 0.5 -2.0 0.1 -0.8 -5.2 6.6 -1.4 -4.5 -1.1 -4.4 -3.5 0.4 -1.6 -1.0 -1.0 0.4 -1.1 -3.3 -4.0 -4.0 +H -0.8 -1.3 0.4 0.4 -0.1 -1.4 6.0 -2.2 0.6 -1.9 -1.3 1.2 -1.1 1.2 0.6 -0.2 -0.3 -2.0 -0.8 2.2 +I -0.8 -1.1 -3.8 -2.7 1.0 -4.5 -2.2 4.0 -2.1 2.8 2.5 -2.8 -2.6 -1.9 -2.4 -1.8 -0.6 3.1 -1.8 -0.7 +K -0.4 -2.8 0.5 1.2 -3.3 -1.1 0.6 -2.1 3.2 -2.1 -1.4 0.8 -0.6 1.5 2.7 0.1 0.1 -1.7 -3.5 -2.1 +L -1.2 -1.5 -4.0 -2.8 2.0 -4.4 -1.9 2.8 -2.1 4.0 2.8 -3.0 -2.3 -1.6 -2.2 -2.1 -1.3 1.8 -0.7 0.0 +M -0.7 -0.9 -3.0 -2.0 1.6 -3.5 -1.3 2.5 -1.4 2.8 4.3 -2.2 -2.4 -1.0 -1.7 -1.4 -0.6 1.6 -1.0 -0.2 +N -0.3 -1.8 2.2 0.9 -3.1 0.4 1.2 -2.8 0.8 -3.0 -2.2 3.8 -0.9 0.7 0.3 0.9 0.5 -2.2 -3.6 -1.4 +P 0.3 -3.1 -0.7 -0.5 -3.8 -1.6 -1.1 -2.6 -0.6 -2.3 -2.4 -0.9 7.6 -0.2 -0.9 0.4 0.1 -1.8 -5.0 -3.1 +Q -0.2 -2.4 0.9 1.7 -2.6 -1.0 1.2 -1.9 1.5 -1.6 -1.0 0.7 -0.2 2.7 1.5 0.2 0.0 -1.5 -2.7 -1.7 +R -0.6 -2.2 -0.3 0.4 -3.2 -1.0 0.6 -2.4 2.7 -2.2 -1.7 0.3 -0.9 1.5 4.7 -0.2 -0.2 -2.0 -1.6 -1.8 +S 1.1 0.1 0.5 0.2 -2.8 0.4 -0.2 -1.8 0.1 -2.1 -1.4 0.9 0.4 0.2 -0.2 2.2 1.5 -1.0 -3.3 -1.9 +T 0.6 -0.5 0.0 -0.1 -2.2 -1.1 -0.3 -0.6 0.1 -1.3 -0.6 0.5 0.1 0.0 -0.2 1.5 2.5 0.0 -3.5 -1.9 +V 0.1 0.0 -2.9 -1.9 0.1 -3.3 -2.0 3.1 -1.7 1.8 1.6 -2.2 -1.8 -1.5 -2.0 -1.0 0.0 3.4 -2.6 -1.1 +W -3.6 -1.0 -5.2 -4.3 3.6 -4.0 -0.8 -1.8 -3.5 -0.7 -1.0 -3.6 -5.0 -2.7 -1.6 -3.3 -3.5 -2.6 14.2 4.1 +Y -2.2 -0.5 -2.8 -2.7 5.1 -4.0 2.2 -0.7 -2.1 0.0 -0.2 -1.4 -3.1 -1.7 -1.8 -1.9 -1.9 -1.1 4.1 7.8 diff --git a/code/lib/Bio/Align/substitution_matrices/data/HOXD70 b/code/lib/Bio/Align/substitution_matrices/data/HOXD70 new file mode 100644 index 0000000..4cbd0f6 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/HOXD70 @@ -0,0 +1,9 @@ +# F. Chiaromonte, V.B. Yap, W. Miller: +# "Scoring pairwise genomic sequence alignments" +# Pacific Symposium on Biocomputing 2002: 115-26 (2002). +# PMID 11928468 + A C G T +A 91 -114 -31 -123 +C -114 100 -125 -31 +G -31 -125 100 -114 +T -123 -31 -114 91 diff --git a/code/lib/Bio/Align/substitution_matrices/data/JOHNSON b/code/lib/Bio/Align/substitution_matrices/data/JOHNSON new file mode 100644 index 0000000..7d30964 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/JOHNSON @@ -0,0 +1,27 @@ +# Mark S. Johnson and John P. Overington: +# "A structural basis for sequence comparisons. An evaluation of scoring +# methodologies." +# Journal of Molecular Biology 233(4): 716-738 (1993). +# Table 3, upper triangle. +# PMID 8411177 + A C D E F G H I K L M N P Q R S T V W Y +A 0.60 -0.34 -0.16 -0.07 -0.32 -0.05 -0.31 -0.22 -0.09 -0.33 -0.15 -0.14 -0.10 -0.06 -0.16 0.00 -0.08 -0.05 -0.58 -0.40 +C -0.34 1.61 -0.97 -0.69 -0.44 -0.82 -0.82 -0.77 -0.87 -0.87 -0.44 -0.76 -0.89 -0.69 -0.56 -0.77 -0.60 -0.48 -0.91 -0.77 +D -0.16 -0.97 0.85 0.24 -0.70 -0.21 -0.07 -0.48 -0.15 -0.80 -0.59 0.26 -0.10 -0.11 -0.34 -0.02 -0.18 -0.52 -0.60 -0.38 +E -0.07 -0.69 0.24 0.86 -0.64 -0.25 -0.23 -0.48 0.11 -0.56 -0.28 -0.07 -0.15 0.24 -0.02 -0.22 -0.05 -0.42 -0.76 -0.37 +F -0.32 -0.44 -0.70 -0.64 1.04 -0.86 -0.17 0.05 -0.56 0.18 -0.06 -0.38 -0.50 -0.64 -0.60 -0.48 -0.50 -0.13 0.34 0.34 +G -0.05 -0.82 -0.21 -0.25 -0.86 0.80 -0.32 -0.55 -0.35 -0.72 -0.52 -0.14 -0.25 -0.28 -0.28 -0.13 -0.38 -0.56 -0.63 -0.54 +H -0.31 -0.82 -0.07 -0.23 -0.17 -0.32 1.27 -0.51 0.01 -0.42 -0.23 0.17 -0.43 0.14 0.01 -0.26 -0.30 -0.39 -0.40 -0.04 +I -0.22 -0.77 -0.48 -0.48 0.05 -0.55 -0.51 0.81 -0.47 0.26 0.26 -0.47 -0.57 -0.70 -0.54 -0.47 -0.32 0.39 -0.33 -0.25 +K -0.09 -0.87 -0.15 0.11 -0.56 -0.35 0.01 -0.47 0.76 -0.34 -0.19 0.01 -0.06 0.11 0.32 -0.15 -0.02 -0.37 -0.54 -0.37 +L -0.33 -0.87 -0.80 -0.56 0.18 -0.72 -0.42 0.26 -0.34 0.73 0.44 -0.48 -0.28 -0.44 -0.37 -0.52 -0.46 0.18 -0.10 -0.24 +M -0.15 -0.44 -0.59 -0.28 -0.06 -0.52 -0.23 0.26 -0.19 0.44 1.12 -0.37 -0.98 -0.06 -0.42 -0.48 -0.32 0.07 -0.09 -0.13 +N -0.14 -0.76 0.26 -0.07 -0.38 -0.14 0.17 -0.47 0.01 -0.48 -0.37 0.80 -0.24 -0.08 -0.15 0.10 0.01 -0.57 -0.61 -0.13 +P -0.10 -0.89 -0.10 -0.15 -0.50 -0.25 -0.43 -0.57 -0.06 -0.28 -0.98 -0.24 1.03 -0.36 -0.36 -0.10 -0.20 -0.52 -0.74 -0.70 +Q -0.06 -0.69 -0.11 0.24 -0.64 -0.28 0.14 -0.70 0.11 -0.44 -0.06 -0.08 -0.36 0.90 0.21 -0.12 -0.04 -0.36 -0.82 -0.51 +R -0.16 -0.56 -0.34 -0.02 -0.60 -0.28 0.01 -0.54 0.32 -0.37 -0.42 -0.15 -0.36 0.21 1.00 -0.06 -0.14 -0.49 -0.38 -0.21 +S 0.00 -0.77 -0.02 -0.22 -0.48 -0.13 -0.26 -0.47 -0.15 -0.52 -0.48 0.10 -0.10 -0.12 -0.06 0.58 0.20 -0.43 -0.62 -0.34 +T -0.08 -0.60 -0.18 -0.05 -0.50 -0.38 -0.30 -0.32 -0.02 -0.46 -0.32 0.01 -0.20 -0.04 -0.14 0.20 0.68 -0.19 -0.93 -0.27 +V -0.05 -0.48 -0.52 -0.42 -0.13 -0.56 -0.39 0.39 -0.37 0.18 0.07 -0.57 -0.52 -0.36 -0.49 -0.43 -0.19 0.70 -0.49 -0.18 +W -0.58 -0.91 -0.60 -0.76 0.34 -0.63 -0.40 -0.33 -0.54 -0.10 -0.09 -0.61 -0.74 -0.82 -0.38 -0.62 -0.93 -0.49 1.52 0.23 +Y -0.40 -0.77 -0.38 -0.37 0.34 -0.54 -0.04 -0.25 -0.37 -0.24 -0.13 -0.13 -0.70 -0.51 -0.21 -0.34 -0.27 -0.18 0.23 1.05 diff --git a/code/lib/Bio/Align/substitution_matrices/data/JONES b/code/lib/Bio/Align/substitution_matrices/data/JONES new file mode 100644 index 0000000..daed995 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/JONES @@ -0,0 +1,26 @@ +# David T. Jones, William R. Taylor, Janet M. Thornton: +# "The rapid generation of mutation data matrices from protein sequences." +# Computer Applications in the Biosciences: CABIOS 8(3): 275-282 (1992). +# Table I, lower triangle. +# PMID 1633570 + A R N D C Q E G H I L K M F P S T W Y V +A 0.2 -0.1 0.0 0.0 -0.1 -0.1 -0.1 0.1 -0.2 0.0 -0.1 -0.1 -0.1 -0.3 0.1 0.1 0.2 -0.4 -0.3 0.1 +R -0.1 0.5 0.0 -0.1 -0.1 0.2 0.0 0.0 0.2 -0.3 -0.3 0.4 -0.2 -0.4 -0.1 -0.1 -0.1 0.0 -0.2 -0.3 +N 0.0 0.0 0.3 0.2 -0.1 0.0 0.1 0.0 0.1 -0.2 -0.3 0.1 -0.2 -0.3 -0.1 0.1 0.1 -0.5 -0.1 -0.2 +D 0.0 -0.1 0.2 0.5 -0.3 0.1 0.4 0.1 0.0 -0.3 -0.4 0.0 -0.3 -0.5 -0.2 0.0 -0.1 -0.5 -0.2 -0.2 +C -0.1 -0.1 -0.1 -0.3 1.1 -0.3 -0.4 -0.1 0.0 -0.2 -0.3 -0.3 -0.2 0.0 -0.2 0.1 -0.1 0.1 0.2 -0.2 +Q -0.1 0.2 0.0 0.1 -0.3 0.5 0.2 -0.1 0.2 -0.3 -0.2 0.2 -0.2 -0.4 0.0 -0.1 -0.1 -0.3 -0.2 -0.3 +E -0.1 0.0 0.1 0.4 -0.4 0.2 0.5 0.0 0.0 -0.3 -0.4 0.1 -0.3 -0.5 -0.2 -0.1 -0.1 -0.5 -0.4 -0.2 +G 0.1 0.0 0.0 0.1 -0.1 -0.1 0.0 0.5 -0.2 -0.3 -0.4 -0.1 -0.3 -0.5 -0.1 0.1 -0.1 -0.2 -0.4 -0.2 +H -0.2 0.2 0.1 0.0 0.0 0.2 0.0 -0.2 0.6 -0.3 -0.2 0.1 -0.2 0.0 0.0 -0.1 -0.1 -0.3 0.4 -0.3 +I 0.0 -0.3 -0.2 -0.3 -0.2 -0.3 -0.3 -0.3 -0.3 0.4 0.2 -0.3 0.3 0.0 -0.2 -0.1 0.1 -0.4 -0.2 0.4 +L -0.1 -0.3 -0.3 -0.4 -0.3 -0.2 -0.4 -0.4 -0.2 0.2 0.5 -0.3 0.3 0.2 0.0 -0.2 -0.1 -0.2 -0.1 0.2 +K -0.1 0.4 0.1 0.0 -0.3 0.2 0.1 -0.1 0.1 -0.3 -0.3 0.5 -0.2 -0.5 -0.2 -0.1 -0.1 -0.3 -0.3 -0.3 +M -0.1 -0.2 -0.2 -0.3 -0.2 -0.2 -0.3 -0.3 -0.2 0.3 0.3 -0.2 0.6 0.0 -0.2 -0.1 0.0 -0.3 -0.2 0.2 +F -0.3 -0.4 -0.3 -0.5 0.0 -0.4 -0.5 -0.5 0.0 0.0 0.2 -0.5 0.0 0.8 -0.3 -0.2 -0.2 -0.1 0.5 0.0 +P 0.1 -0.1 -0.1 -0.2 -0.2 0.0 -0.2 -0.1 0.0 -0.2 0.0 -0.2 -0.2 -0.3 0.6 0.1 0.1 -0.4 -0.3 -0.1 +S 0.1 -0.1 0.1 0.0 0.1 -0.1 -0.1 0.1 -0.1 -0.1 -0.2 -0.1 -0.1 -0.2 0.1 0.2 0.1 -0.3 -0.1 -0.1 +T 0.2 -0.1 0.1 -0.1 -0.1 -0.1 -0.1 -0.1 -0.1 0.1 -0.1 -0.1 0.0 -0.2 0.1 0.1 0.2 -0.4 -0.3 0.0 +W -0.4 0.0 -0.5 -0.5 0.1 -0.3 -0.5 -0.2 -0.3 -0.4 -0.2 -0.3 -0.3 -0.1 -0.4 -0.3 -0.4 1.5 0.0 -0.3 +Y -0.3 -0.2 -0.1 -0.2 0.2 -0.2 -0.4 -0.4 0.4 -0.2 -0.1 -0.3 -0.2 0.5 -0.3 -0.1 -0.3 0.0 0.9 -0.3 +V 0.1 -0.3 -0.2 -0.2 -0.2 -0.3 -0.2 -0.2 -0.3 0.4 0.2 -0.3 0.2 0.0 -0.1 -0.1 0.0 -0.3 -0.3 0.4 diff --git a/code/lib/Bio/Align/substitution_matrices/data/LEVIN b/code/lib/Bio/Align/substitution_matrices/data/LEVIN new file mode 100644 index 0000000..2f9c8c4 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/LEVIN @@ -0,0 +1,27 @@ +# Jonathan M. Levin, Barry Robson, Jean Garnier: +# "An algorithm for secondary structure determination in proteins based on +# sequence similarity." +# FEBS Letters 205(2): 303-308 (1986). +# Figure 1. +# PMID 3743779 + A C D E F G H I K L M N P Q R S T V W Y +A 2 0 0 1 -1 0 0 0 0 0 0 0 -1 0 0 1 0 0 -1 -1 +C 0 2 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -1 +D 0 0 2 1 -1 0 0 -1 0 -1 -1 1 0 0 0 0 0 -1 -1 -1 +E 1 0 1 2 -1 0 0 -1 0 -1 -1 0 -1 1 0 0 0 -1 -1 -1 +F -1 -1 -1 -1 2 -1 -1 1 -1 0 0 -1 -1 -1 -1 -1 -1 0 0 1 +G 0 0 0 0 -1 2 0 -1 0 -1 -1 0 0 0 0 0 0 -1 -1 -1 +H 0 0 0 0 -1 0 2 -1 0 -1 -1 0 0 0 0 0 0 -1 -1 0 +I 0 0 -1 -1 1 -1 -1 2 -1 0 0 -1 -1 -1 -1 -1 0 1 0 0 +K 0 0 0 0 -1 0 0 -1 2 -1 -1 1 0 0 1 0 0 -1 -1 -1 +L 0 0 -1 -1 0 -1 -1 0 -1 2 2 -1 -1 -1 -1 -1 0 1 0 0 +M 0 0 -1 -1 0 -1 -1 0 -1 2 2 -1 -1 -1 -1 -1 0 0 0 0 +N 0 0 1 0 -1 0 0 -1 1 -1 -1 3 0 1 0 0 0 -1 -1 -1 +P -1 0 0 -1 -1 0 0 -1 0 -1 -1 0 3 0 0 0 0 -1 -1 -1 +Q 0 0 0 1 -1 0 0 -1 0 -1 -1 1 0 2 0 0 0 -1 -1 -1 +R 0 0 0 0 -1 0 0 -1 1 -1 -1 0 0 0 2 0 0 -1 0 -1 +S 1 0 0 0 -1 0 0 -1 0 -1 -1 0 0 0 0 2 0 -1 -1 -1 +T 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 -1 +V 0 0 -1 -1 0 -1 -1 1 -1 1 0 -1 -1 -1 -1 -1 0 2 0 0 +W -1 -1 -1 -1 0 -1 -1 0 -1 0 0 -1 -1 -1 0 -1 -1 0 2 0 +Y -1 -1 -1 -1 1 -1 0 0 -1 0 0 -1 -1 -1 -1 -1 -1 0 0 2 diff --git a/code/lib/Bio/Align/substitution_matrices/data/MCLACHLAN b/code/lib/Bio/Align/substitution_matrices/data/MCLACHLAN new file mode 100644 index 0000000..adf81ce --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/MCLACHLAN @@ -0,0 +1,27 @@ +# A.D. McLachlan: +# "Tests for comparing related amino-acid sequences. Cytochrome c and +# cytochrome c 551." +# Journal of Molecular Biology 61(2): 409-424 (1971). +# Figure 1. +# PMID 5167087 + A C D E F G H I K L M N P Q R S T V W Y +A 8 1 3 4 1 3 3 2 3 2 3 3 4 3 2 4 3 3 1 1 +C 1 9 1 0 0 1 3 1 0 0 3 1 0 0 1 2 2 1 2 1 +D 3 1 8 5 1 3 4 1 3 1 2 5 3 4 1 3 3 1 0 1 +E 4 0 5 8 0 3 2 1 4 1 1 4 4 5 3 4 4 2 1 2 +F 1 0 1 0 9 0 4 3 0 5 5 0 1 0 1 2 1 3 6 6 +G 3 1 3 3 0 8 2 1 3 1 1 3 3 2 3 3 2 2 1 0 +H 3 3 4 2 4 2 8 2 4 2 3 4 3 4 5 3 4 2 3 4 +I 2 1 1 1 3 1 2 8 1 5 5 1 1 0 1 2 3 5 3 3 +K 3 0 3 4 0 3 4 1 8 2 1 4 3 4 5 3 3 2 1 1 +L 2 0 1 1 5 1 2 5 2 8 6 1 1 3 2 2 3 5 3 3 +M 3 3 2 1 5 1 3 5 1 6 8 2 1 3 1 2 3 4 1 2 +N 3 1 5 4 0 3 4 1 4 1 2 8 1 4 3 5 3 1 0 2 +P 4 0 3 4 1 3 3 1 3 1 1 1 8 3 3 3 3 2 0 0 +Q 3 0 4 5 0 2 4 0 4 3 3 4 3 8 5 4 3 2 2 1 +R 2 1 1 3 1 3 5 1 5 2 1 3 3 5 8 4 3 2 3 2 +S 4 2 3 4 2 3 3 2 3 2 2 5 3 4 4 8 5 2 3 3 +T 3 2 3 4 1 2 4 3 3 3 3 3 3 3 3 5 8 3 2 1 +V 3 1 1 2 3 2 2 5 2 5 4 1 2 2 2 2 3 8 2 3 +W 1 2 0 1 6 1 3 3 1 3 1 0 0 2 3 3 2 2 9 6 +Y 1 1 1 2 6 0 4 3 1 3 2 2 0 1 2 3 1 3 6 9 diff --git a/code/lib/Bio/Align/substitution_matrices/data/MDM78 b/code/lib/Bio/Align/substitution_matrices/data/MDM78 new file mode 100644 index 0000000..5d0b2ef --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/MDM78 @@ -0,0 +1,27 @@ +# R.M. Schwartz and M.O. Dayhoff: +# "Matrices for Detecting Distant Relationships." +# Margaret O. Dayhoff: Atlas of Protein Sequence and Structure, +# Volume 5, Supplement 3, 1978, pages 353-358. +# The National Biomedical Research Foundation, 1979. +# Figure 85, page 354. + A R N D C Q E G H I L K M F P S T W Y V +A 0.18 -0.15 0.02 0.03 -0.20 -0.04 0.03 0.13 -0.14 -0.05 -0.19 -0.12 -0.11 -0.35 0.11 0.11 0.12 -0.58 -0.35 0.02 +R -0.15 0.61 0.00 -0.13 -0.36 0.13 -0.11 -0.26 0.16 -0.20 -0.30 0.34 -0.04 -0.45 -0.02 -0.03 -0.09 0.22 -0.42 -0.25 +N 0.02 0.00 0.20 0.21 -0.36 0.08 0.14 0.03 0.16 -0.18 -0.29 0.10 -0.17 -0.35 -0.05 0.07 0.04 -0.42 -0.21 -0.17 +D 0.03 -0.13 0.21 0.39 -0.51 0.16 0.34 0.06 0.07 -0.24 -0.40 0.01 -0.26 -0.56 -0.10 0.03 -0.01 -0.68 -0.43 -0.21 +C -0.20 -0.36 -0.36 -0.51 1.19 -0.54 -0.53 -0.34 -0.34 -0.23 -0.60 -0.54 -0.52 -0.43 -0.28 0.00 -0.22 -0.78 0.03 -0.19 +Q -0.04 0.13 0.08 0.16 -0.54 0.40 0.25 -0.12 0.29 -0.20 -0.18 0.07 -0.10 -0.47 0.02 -0.05 -0.08 -0.48 -0.40 -0.19 +E 0.03 -0.11 0.14 0.34 -0.53 0.25 0.38 0.02 0.07 -0.20 -0.34 -0.01 -0.21 -0.54 -0.06 0.00 -0.04 -0.70 -0.43 -0.18 +G 0.13 -0.26 0.03 0.06 -0.34 -0.12 0.02 0.48 -0.21 -0.26 -0.41 -0.17 -0.28 -0.48 -0.05 0.11 0.00 -0.70 -0.52 -0.14 +H -0.14 0.16 0.16 0.07 -0.34 0.29 0.07 -0.21 0.65 -0.24 -0.21 0.00 -0.21 -0.18 -0.02 -0.08 -0.13 -0.28 -0.01 -0.22 +I -0.05 -0.20 -0.18 -0.24 -0.23 -0.20 -0.20 -0.26 -0.24 0.45 0.24 -0.19 0.22 0.10 -0.20 -0.14 0.01 -0.51 -0.09 0.37 +L -0.19 -0.30 -0.29 -0.40 -0.60 -0.18 -0.34 -0.41 -0.21 0.24 0.59 -0.29 0.37 0.18 -0.25 -0.28 -0.17 -0.18 -0.09 0.19 +K -0.12 0.34 0.10 0.01 -0.54 0.07 -0.01 -0.17 0.00 -0.19 -0.29 0.47 0.04 -0.53 -0.11 -0.02 0.00 -0.35 -0.44 -0.24 +M -0.11 -0.04 -0.17 -0.26 -0.52 -0.10 -0.21 -0.28 -0.21 0.22 0.37 0.04 0.64 0.02 -0.21 -0.16 -0.06 -0.42 -0.24 0.18 +F -0.35 -0.45 -0.35 -0.56 -0.43 -0.47 -0.54 -0.48 -0.18 0.10 0.18 -0.53 0.02 0.91 -0.46 -0.32 -0.31 0.04 0.70 -0.12 +P 0.11 -0.02 -0.05 -0.10 -0.28 0.02 -0.06 -0.05 -0.02 -0.20 -0.25 -0.11 -0.21 -0.46 0.59 0.09 0.03 -0.56 -0.49 -0.12 +S 0.11 -0.03 0.07 0.03 0.00 -0.05 0.00 0.11 -0.08 -0.14 -0.28 -0.02 -0.16 -0.32 0.09 0.16 0.13 -0.25 -0.28 -0.10 +T 0.12 -0.09 0.04 -0.01 -0.22 -0.08 -0.04 0.00 -0.13 0.01 -0.17 0.00 -0.06 -0.31 0.03 0.13 0.26 -0.52 -0.27 0.03 +W -0.58 0.22 -0.42 -0.68 -0.78 -0.48 -0.70 -0.70 -0.28 -0.51 -0.18 -0.35 -0.42 0.04 -0.56 -0.25 -0.52 1.73 -0.02 -0.62 +Y -0.35 -0.42 -0.21 -0.43 0.03 -0.40 -0.43 -0.52 -0.01 -0.09 -0.09 -0.44 -0.24 0.70 -0.49 -0.28 -0.27 -0.02 1.01 -0.25 +V 0.02 -0.25 -0.17 -0.21 -0.19 -0.19 -0.18 -0.14 -0.22 0.37 0.19 -0.24 0.18 -0.12 -0.12 -0.10 0.03 -0.62 -0.25 0.43 diff --git a/code/lib/Bio/Align/substitution_matrices/data/NUC.4.4 b/code/lib/Bio/Align/substitution_matrices/data/NUC.4.4 new file mode 100644 index 0000000..6fb12d2 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/NUC.4.4 @@ -0,0 +1,25 @@ +# +# This matrix was created by Todd Lowe 12/10/92 +# +# Uses ambiguous nucleotide codes, probabilities rounded to +# nearest integer +# +# Lowest score = -4, Highest score = 5 +# + A T G C S W R Y K M B V H D N +A 5 -4 -4 -4 -4 1 1 -4 -4 1 -4 -1 -1 -1 -2 +T -4 5 -4 -4 -4 1 -4 1 1 -4 -1 -4 -1 -1 -2 +G -4 -4 5 -4 1 -4 1 -4 1 -4 -1 -1 -4 -1 -2 +C -4 -4 -4 5 1 -4 -4 1 -4 1 -1 -1 -1 -4 -2 +S -4 -4 1 1 -1 -4 -2 -2 -2 -2 -1 -1 -3 -3 -1 +W 1 1 -4 -4 -4 -1 -2 -2 -2 -2 -3 -3 -1 -1 -1 +R 1 -4 1 -4 -2 -2 -1 -4 -2 -2 -3 -1 -3 -1 -1 +Y -4 1 -4 1 -2 -2 -4 -1 -2 -2 -1 -3 -1 -3 -1 +K -4 1 1 -4 -2 -2 -2 -2 -1 -4 -1 -3 -3 -1 -1 +M 1 -4 -4 1 -2 -2 -2 -2 -4 -1 -3 -1 -1 -3 -1 +B -4 -1 -1 -1 -1 -3 -3 -1 -1 -3 -1 -2 -2 -2 -1 +V -1 -4 -1 -1 -1 -3 -1 -3 -3 -1 -2 -1 -2 -2 -1 +H -1 -1 -4 -1 -3 -1 -3 -1 -3 -1 -2 -2 -1 -2 -1 +D -1 -1 -1 -4 -3 -1 -1 -3 -1 -3 -2 -2 -2 -1 -1 +N -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 + diff --git a/code/lib/Bio/Align/substitution_matrices/data/PAM250 b/code/lib/Bio/Align/substitution_matrices/data/PAM250 new file mode 100644 index 0000000..17e9e60 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/PAM250 @@ -0,0 +1,34 @@ +# +# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93] +# +# PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049 +# +# Expected score = -0.844, Entropy = 0.354 bits +# +# Lowest score = -8, Highest score = 17 +# + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8 +R -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8 +N 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0 -8 +D 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1 -8 +C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3 -8 +Q 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1 -8 +E 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1 -8 +G 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1 -8 +H -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1 -8 +I -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1 -8 +L -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1 -8 +K -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1 -8 +M -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1 -8 +F -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2 -8 +P 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1 -8 +S 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0 -8 +T 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0 -8 +W -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4 -8 +Y -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2 -8 +V 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1 -8 +B 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1 -8 +Z 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1 -8 +X 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1 -8 +* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 diff --git a/code/lib/Bio/Align/substitution_matrices/data/PAM30 b/code/lib/Bio/Align/substitution_matrices/data/PAM30 new file mode 100644 index 0000000..8a01c88 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/PAM30 @@ -0,0 +1,34 @@ +# +# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93] +# +# PAM 30 substitution matrix, scale = ln(2)/2 = 0.346574 +# +# Expected score = -5.06, Entropy = 2.57 bits +# +# Lowest score = -17, Highest score = 13 +# + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 6 -7 -4 -3 -6 -4 -2 -2 -7 -5 -6 -7 -5 -8 -2 0 -1 -13 -8 -2 -3 -3 -3 -17 +R -7 8 -6 -10 -8 -2 -9 -9 -2 -5 -8 0 -4 -9 -4 -3 -6 -2 -10 -8 -7 -4 -6 -17 +N -4 -6 8 2 -11 -3 -2 -3 0 -5 -7 -1 -9 -9 -6 0 -2 -8 -4 -8 6 -3 -3 -17 +D -3 -10 2 8 -14 -2 2 -3 -4 -7 -12 -4 -11 -15 -8 -4 -5 -15 -11 -8 6 1 -5 -17 +C -6 -8 -11 -14 10 -14 -14 -9 -7 -6 -15 -14 -13 -13 -8 -3 -8 -15 -4 -6 -12 -14 -9 -17 +Q -4 -2 -3 -2 -14 8 1 -7 1 -8 -5 -3 -4 -13 -3 -5 -5 -13 -12 -7 -3 6 -5 -17 +E -2 -9 -2 2 -14 1 8 -4 -5 -5 -9 -4 -7 -14 -5 -4 -6 -17 -8 -6 1 6 -5 -17 +G -2 -9 -3 -3 -9 -7 -4 6 -9 -11 -10 -7 -8 -9 -6 -2 -6 -15 -14 -5 -3 -5 -5 -17 +H -7 -2 0 -4 -7 1 -5 -9 9 -9 -6 -6 -10 -6 -4 -6 -7 -7 -3 -6 -1 -1 -5 -17 +I -5 -5 -5 -7 -6 -8 -5 -11 -9 8 -1 -6 -1 -2 -8 -7 -2 -14 -6 2 -6 -6 -5 -17 +L -6 -8 -7 -12 -15 -5 -9 -10 -6 -1 7 -8 1 -3 -7 -8 -7 -6 -7 -2 -9 -7 -6 -17 +K -7 0 -1 -4 -14 -3 -4 -7 -6 -6 -8 7 -2 -14 -6 -4 -3 -12 -9 -9 -2 -4 -5 -17 +M -5 -4 -9 -11 -13 -4 -7 -8 -10 -1 1 -2 11 -4 -8 -5 -4 -13 -11 -1 -10 -5 -5 -17 +F -8 -9 -9 -15 -13 -13 -14 -9 -6 -2 -3 -14 -4 9 -10 -6 -9 -4 2 -8 -10 -13 -8 -17 +P -2 -4 -6 -8 -8 -3 -5 -6 -4 -8 -7 -6 -8 -10 8 -2 -4 -14 -13 -6 -7 -4 -5 -17 +S 0 -3 0 -4 -3 -5 -4 -2 -6 -7 -8 -4 -5 -6 -2 6 0 -5 -7 -6 -1 -5 -3 -17 +T -1 -6 -2 -5 -8 -5 -6 -6 -7 -2 -7 -3 -4 -9 -4 0 7 -13 -6 -3 -3 -6 -4 -17 +W -13 -2 -8 -15 -15 -13 -17 -15 -7 -14 -6 -12 -13 -4 -14 -5 -13 13 -5 -15 -10 -14 -11 -17 +Y -8 -10 -4 -11 -4 -12 -8 -14 -3 -6 -7 -9 -11 2 -13 -7 -6 -5 10 -7 -6 -9 -7 -17 +V -2 -8 -8 -8 -6 -7 -6 -5 -6 2 -2 -9 -1 -8 -6 -6 -3 -15 -7 7 -8 -6 -5 -17 +B -3 -7 6 6 -12 -3 1 -3 -1 -6 -9 -2 -10 -10 -7 -1 -3 -10 -6 -8 6 0 -5 -17 +Z -3 -4 -3 1 -14 6 6 -5 -1 -6 -7 -4 -5 -13 -4 -5 -6 -14 -9 -6 0 6 -5 -17 +X -3 -6 -3 -5 -9 -5 -5 -5 -5 -5 -6 -5 -5 -8 -5 -3 -4 -11 -7 -5 -5 -5 -5 -17 +* -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 1 diff --git a/code/lib/Bio/Align/substitution_matrices/data/PAM70 b/code/lib/Bio/Align/substitution_matrices/data/PAM70 new file mode 100644 index 0000000..b20cdf0 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/PAM70 @@ -0,0 +1,34 @@ +# +# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93] +# +# PAM 70 substitution matrix, scale = ln(2)/2 = 0.346574 +# +# Expected score = -2.77, Entropy = 1.60 bits +# +# Lowest score = -11, Highest score = 13 +# + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 5 -4 -2 -1 -4 -2 -1 0 -4 -2 -4 -4 -3 -6 0 1 1 -9 -5 -1 -1 -1 -2 -11 +R -4 8 -3 -6 -5 0 -5 -6 0 -3 -6 2 -2 -7 -2 -1 -4 0 -7 -5 -4 -2 -3 -11 +N -2 -3 6 3 -7 -1 0 -1 1 -3 -5 0 -5 -6 -3 1 0 -6 -3 -5 5 -1 -2 -11 +D -1 -6 3 6 -9 0 3 -1 -1 -5 -8 -2 -7 -10 -4 -1 -2 -10 -7 -5 5 2 -3 -11 +C -4 -5 -7 -9 9 -9 -9 -6 -5 -4 -10 -9 -9 -8 -5 -1 -5 -11 -2 -4 -8 -9 -6 -11 +Q -2 0 -1 0 -9 7 2 -4 2 -5 -3 -1 -2 -9 -1 -3 -3 -8 -8 -4 -1 5 -2 -11 +E -1 -5 0 3 -9 2 6 -2 -2 -4 -6 -2 -4 -9 -3 -2 -3 -11 -6 -4 2 5 -3 -11 +G 0 -6 -1 -1 -6 -4 -2 6 -6 -6 -7 -5 -6 -7 -3 0 -3 -10 -9 -3 -1 -3 -3 -11 +H -4 0 1 -1 -5 2 -2 -6 8 -6 -4 -3 -6 -4 -2 -3 -4 -5 -1 -4 0 1 -3 -11 +I -2 -3 -3 -5 -4 -5 -4 -6 -6 7 1 -4 1 0 -5 -4 -1 -9 -4 3 -4 -4 -3 -11 +L -4 -6 -5 -8 -10 -3 -6 -7 -4 1 6 -5 2 -1 -5 -6 -4 -4 -4 0 -6 -4 -4 -11 +K -4 2 0 -2 -9 -1 -2 -5 -3 -4 -5 6 0 -9 -4 -2 -1 -7 -7 -6 -1 -2 -3 -11 +M -3 -2 -5 -7 -9 -2 -4 -6 -6 1 2 0 10 -2 -5 -3 -2 -8 -7 0 -6 -3 -3 -11 +F -6 -7 -6 -10 -8 -9 -9 -7 -4 0 -1 -9 -2 8 -7 -4 -6 -2 4 -5 -7 -9 -5 -11 +P 0 -2 -3 -4 -5 -1 -3 -3 -2 -5 -5 -4 -5 -7 7 0 -2 -9 -9 -3 -4 -2 -3 -11 +S 1 -1 1 -1 -1 -3 -2 0 -3 -4 -6 -2 -3 -4 0 5 2 -3 -5 -3 0 -2 -1 -11 +T 1 -4 0 -2 -5 -3 -3 -3 -4 -1 -4 -1 -2 -6 -2 2 6 -8 -4 -1 -1 -3 -2 -11 +W -9 0 -6 -10 -11 -8 -11 -10 -5 -9 -4 -7 -8 -2 -9 -3 -8 13 -3 -10 -7 -10 -7 -11 +Y -5 -7 -3 -7 -2 -8 -6 -9 -1 -4 -4 -7 -7 4 -9 -5 -4 -3 9 -5 -4 -7 -5 -11 +V -1 -5 -5 -5 -4 -4 -4 -3 -4 3 0 -6 0 -5 -3 -3 -1 -10 -5 6 -5 -4 -2 -11 +B -1 -4 5 5 -8 -1 2 -1 0 -4 -6 -1 -6 -7 -4 0 -1 -7 -4 -5 5 1 -2 -11 +Z -1 -2 -1 2 -9 5 5 -3 1 -4 -4 -2 -3 -9 -2 -2 -3 -10 -7 -4 1 5 -3 -11 +X -2 -3 -2 -3 -6 -2 -3 -3 -3 -3 -4 -3 -3 -5 -3 -1 -2 -7 -5 -2 -2 -3 -3 -11 +* -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 -11 1 diff --git a/code/lib/Bio/Align/substitution_matrices/data/RAO b/code/lib/Bio/Align/substitution_matrices/data/RAO new file mode 100644 index 0000000..f3ef1c0 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/RAO @@ -0,0 +1,27 @@ +# J.K. Mohana Rao: +# "New scoring matrix for amino acid residue exchanges based on residue +# characteristic physical parameters." +# International Journal of Peptide and Protein Research: 29(2): 276-281 (1987). +# Figure 1, lower triangle. +# PMID 3570667 + A C D E F G H I K L M N P Q R S T V W Y +A 16 11 9 10 10 8 11 9 10 11 11 9 6 11 8 10 10 9 11 9 +C 11 16 8 9 10 8 10 8 9 11 10 9 7 10 8 10 10 8 11 10 +D 9 8 16 11 4 9 9 3 11 6 5 11 8 11 10 10 9 3 6 7 +E 10 9 11 16 6 6 11 4 11 7 8 10 5 11 9 9 8 4 7 6 +F 10 10 4 6 16 7 9 12 6 11 10 6 4 7 5 8 10 11 11 10 +G 8 8 9 6 7 16 7 6 7 6 4 10 11 8 7 11 10 6 8 10 +H 11 10 9 11 9 7 16 8 11 10 10 10 5 11 10 10 10 9 10 9 +I 9 8 3 4 12 6 8 16 4 10 9 5 3 6 4 8 10 12 11 10 +K 10 9 11 11 6 7 11 4 16 7 8 11 6 12 11 10 9 5 7 7 +L 11 11 6 7 11 6 10 10 7 16 11 7 4 9 6 8 9 10 11 9 +M 11 10 5 8 10 4 10 9 8 11 16 6 2 9 6 7 8 9 10 8 +N 9 9 11 10 6 10 10 5 11 7 6 16 9 11 10 11 10 5 8 8 +P 6 7 8 5 4 11 5 3 6 4 2 9 16 7 6 10 8 3 6 8 +Q 11 10 11 11 7 8 11 6 12 9 9 11 7 16 10 10 10 6 9 8 +R 8 8 10 9 5 7 10 4 11 6 6 10 6 10 16 9 9 5 7 7 +S 10 10 10 9 8 11 10 8 10 8 7 11 10 10 9 16 11 8 10 11 +T 10 10 9 8 10 10 10 10 9 9 8 10 8 10 9 11 16 10 11 11 +V 9 8 3 4 11 6 9 12 5 10 9 5 3 6 5 8 10 16 11 10 +W 11 11 6 7 11 8 10 11 7 11 10 8 6 9 7 10 11 11 16 11 +Y 9 10 7 6 10 10 9 10 7 9 8 8 8 8 7 11 11 10 11 16 diff --git a/code/lib/Bio/Align/substitution_matrices/data/RISLER b/code/lib/Bio/Align/substitution_matrices/data/RISLER new file mode 100644 index 0000000..438b601 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/RISLER @@ -0,0 +1,27 @@ +# J.L. Risler, M.O. Delorme, H. Delacroix, A. Henaut: +# "Amino acid substitutions in structurally related proteins. A pattern +# recognition approach. Determination of a new and efficient scoring matrix." +# Journal of Molecular Biology 204(4): 1019-1029 (1988). +# Figure 5. +# PMID 3221397 + A C D E F G H I K L M N P Q R S T V W Y +A 2.2 -1.5 0.2 1.7 0.6 0.6 -0.6 1.7 1.4 1.3 1.0 1.3 -0.2 1.8 1.5 2.0 1.9 2.0 -0.9 0.2 +C -1.5 2.2 -1.7 -1.5 -1.6 -1.7 -1.8 -1.6 -1.6 -1.5 -1.6 -1.6 -1.8 -1.4 -1.5 -1.3 -1.4 -1.4 -1.8 -1.1 +D 0.2 -1.7 2.2 1.0 -0.3 -0.4 -1.3 0.0 0.1 -0.2 -0.5 0.8 -1.2 0.6 -0.1 0.7 0.0 0.0 -1.4 -0.4 +E 1.7 -1.5 1.0 2.2 0.6 0.3 -0.6 1.5 1.4 0.9 0.6 1.4 -0.1 2.1 1.9 1.8 1.6 1.6 -1.0 0.2 +F 0.6 -1.6 -0.3 0.6 2.2 -0.4 -1.1 1.0 0.1 1.0 -0.2 0.4 -1.1 0.7 0.4 0.5 0.3 0.8 -0.9 2.0 +G 0.6 -1.7 -0.4 0.3 -0.4 2.2 -1.2 0.0 -0.1 -0.2 -0.4 0.2 -1.2 0.2 0.1 0.7 0.2 0.1 -1.3 -0.2 +H -0.6 -1.8 -1.3 -0.6 -1.1 -1.2 2.2 -0.8 -1.0 -0.9 -1.2 -0.3 -1.6 -0.5 -0.4 -0.4 -0.9 -0.7 -1.7 -0.8 +I 1.7 -1.6 0.0 1.5 1.0 0.0 -0.8 2.2 1.0 2.1 0.9 0.9 -0.6 1.4 1.4 1.6 1.6 2.2 -0.7 0.4 +K 1.4 -1.6 0.1 1.4 0.1 -0.1 -1.0 1.0 2.2 0.7 0.4 1.0 -0.7 1.7 2.1 1.4 1.2 1.2 -1.1 0.5 +L 1.3 -1.5 -0.2 0.9 1.0 -0.2 -0.9 2.1 0.7 2.2 1.8 0.8 -0.8 1.1 1.2 1.3 1.2 2.0 -0.8 0.5 +M 1.0 -1.6 -0.5 0.6 -0.2 -0.4 -1.2 0.9 0.4 1.8 2.2 0.0 -1.2 1.2 1.1 0.6 0.8 0.8 -1.3 -0.2 +N 1.3 -1.6 0.8 1.4 0.4 0.2 -0.3 0.9 1.0 0.8 0.0 2.2 -1.0 1.6 1.2 1.9 1.1 1.1 -1.1 -0.1 +P -0.2 -1.8 -1.2 -0.1 -1.1 -1.2 -1.6 -0.6 -0.7 -0.8 -1.2 -1.0 2.2 -0.6 -0.3 -0.3 -0.5 -0.6 -1.6 -1.2 +Q 1.8 -1.4 0.6 2.1 0.7 0.2 -0.5 1.4 1.7 1.1 1.2 1.6 -0.6 2.2 2.0 1.8 1.7 1.5 -1.0 0.5 +R 1.5 -1.5 -0.1 1.9 0.4 0.1 -0.4 1.4 2.1 1.2 1.1 1.2 -0.3 2.0 2.2 2.0 1.9 1.5 -0.8 0.8 +S 2.0 -1.3 0.7 1.8 0.5 0.7 -0.4 1.6 1.4 1.3 0.6 1.9 -0.3 1.8 2.0 2.2 2.1 1.8 -0.8 0.4 +T 1.9 -1.4 0.0 1.6 0.3 0.2 -0.9 1.6 1.2 1.2 0.8 1.1 -0.5 1.7 1.9 2.1 2.2 1.6 -1.0 0.3 +V 2.0 -1.4 0.0 1.6 0.8 0.1 -0.7 2.2 1.2 2.0 0.8 1.1 -0.6 1.5 1.5 1.8 1.6 2.2 -0.7 0.3 +W -0.9 -1.8 -1.4 -1.0 -0.9 -1.3 -1.7 -0.7 -1.1 -0.8 -1.3 -1.1 -1.6 -1.0 -0.8 -0.8 -1.0 -0.7 2.2 -0.6 +Y 0.2 -1.1 -0.4 0.2 2.0 -0.2 -0.8 0.4 0.5 0.5 -0.2 -0.1 -1.2 0.5 0.8 0.4 0.3 0.3 -0.6 2.2 diff --git a/code/lib/Bio/Align/substitution_matrices/data/SCHNEIDER b/code/lib/Bio/Align/substitution_matrices/data/SCHNEIDER new file mode 100644 index 0000000..0384fa9 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/SCHNEIDER @@ -0,0 +1,70 @@ +# Adrian Schneider, Gina M. Cannarozzi, and Gaston H. Gonnet: +# "Empirical codon substitution matrix." +# BMC Bioinformatics 6:134 (2005). +# Additional File 3. +# PMID 15927081 + AAA AAC AAG AAT ACA ACC ACG ACT AGA AGC AGG AGT ATA ATC ATG ATT CAA CAC CAG CAT CCA CCC CCG CCT CGA CGC CGG CGT CTA CTC CTG CTT GAA GAC GAG GAT GCA GCC GCG GCT GGA GGC GGG GGT GTA GTC GTG GTT TAA TAC TAG TAT TCA TCC TCG TCT TGA TGC TGG TGT TTA TTC TTG TTT +AAA 11.6 -2.7 9.7 -1.7 -2.7 -6.4 -3.9 -5.6 5.1 -5.0 3.6 -4.2 -6.3 -13.0 -7.1 -11.5 0.4 -6.0 -1.9 -5.3 -8.5 -11.2 -8.9 -10.8 2.1 0.0 1.4 0.2 -10.2 -13.5 -13.0 -12.5 -2.6 -8.5 -5.0 -8.1 -6.3 -9.9 -7.5 -9.0 -7.1 -10.2 -8.2 -9.2 -8.2 -12.5 -11.1 -11.4 -50.0 -14.8 -50.0 -13.8 -7.3 -10.1 -8.4 -9.1 -50.0 -13.0 -13.5 -12.4 -10.7 -18.1 -11.8 -17.2 +AAC -2.7 13.0 -3.3 10.9 -3.5 -0.4 -3.3 -1.8 -5.4 4.6 -5.5 3.0 -10.2 -7.9 -9.9 -9.6 -5.0 0.5 -5.5 -1.0 -10.3 -8.1 -9.4 -9.6 -8.1 -5.0 -7.3 -6.3 -13.4 -11.3 -14.4 -12.9 -6.3 0.8 -6.4 -1.1 -7.4 -5.0 -6.2 -6.5 -5.6 -1.6 -4.7 -3.0 -10.8 -8.7 -11.9 -10.0 -50.0 -6.2 -50.0 -7.5 -6.3 -4.3 -6.2 -5.4 -50.0 -7.0 -16.3 -8.2 -13.2 -12.3 -13.1 -13.3 +AAG 9.7 -3.3 11.6 -2.8 -4.5 -6.7 -3.1 -6.9 3.3 -5.5 4.8 -5.1 -8.9 -13.2 -5.7 -12.6 -1.5 -6.1 -0.6 -6.1 -10.0 -11.8 -8.6 -11.9 1.2 0.5 2.2 0.1 -11.8 -14.0 -11.9 -13.4 -4.9 -9.1 -3.4 -8.9 -8.0 -10.0 -7.2 -10.0 -9.1 -10.2 -7.1 -9.9 -10.1 -13.0 -10.6 -12.7 -50.0 -14.9 -50.0 -14.4 -8.9 -10.8 -8.9 -10.4 -50.0 -13.1 -11.8 -13.0 -12.4 -19.4 -11.5 -17.8 +AAT -1.7 10.9 -2.8 12.9 -2.7 -2.2 -2.8 0.2 -4.8 2.9 -5.2 5.2 -9.1 -9.5 -9.0 -7.0 -4.0 -1.0 -5.0 1.0 -9.0 -8.8 -8.9 -7.7 -7.0 -6.8 -7.1 -4.3 -12.4 -12.9 -13.9 -10.0 -4.9 -0.7 -5.6 1.4 -6.0 -6.5 -6.2 -4.8 -5.1 -3.2 -5.0 -1.2 -9.8 -9.9 -11.3 -8.2 -50.0 -7.6 -50.0 -5.1 -5.5 -5.8 -5.9 -4.2 -50.0 -8.3 -15.5 -5.9 -11.6 -14.2 -12.0 -11.6 +ACA -2.7 -3.5 -4.5 -2.7 11.7 9.0 10.6 9.6 -3.2 -0.7 -5.1 -0.4 0.7 -4.6 -0.4 -3.6 -4.8 -8.9 -6.5 -7.8 -1.4 -4.1 -2.9 -3.3 -8.0 -9.1 -8.0 -8.2 -6.0 -9.3 -7.7 -8.5 -6.4 -10.1 -7.5 -8.6 2.8 -0.7 0.9 0.1 -5.9 -7.3 -6.5 -6.5 -0.4 -3.9 -2.6 -3.1 -50.0 -14.6 -50.0 -12.6 2.9 0.1 1.8 1.0 -50.0 -9.2 -13.6 -7.7 -4.8 -12.2 -6.2 -11.0 +ACC -6.4 -0.4 -6.7 -2.2 9.0 12.3 9.8 9.6 -7.1 2.4 -7.2 0.3 -2.9 -1.2 -3.0 -3.3 -7.6 -6.7 -8.1 -7.9 -4.9 -1.9 -4.5 -3.8 -9.5 -7.8 -9.3 -8.9 -9.4 -7.1 -9.0 -9.0 -9.4 -7.5 -9.3 -9.2 -0.7 2.6 0.2 -0.1 -8.0 -4.6 -7.7 -6.1 -3.6 -1.0 -4.1 -3.1 -50.0 -11.6 -50.0 -11.6 0.2 2.3 0.4 0.5 -50.0 -6.5 -15.3 -7.8 -8.4 -9.5 -8.3 -11.1 +ACG -3.9 -3.3 -3.1 -2.8 10.6 9.8 12.2 9.8 -4.9 0.1 -3.6 -0.2 -1.7 -4.1 0.9 -3.5 -5.9 -8.9 -5.6 -8.5 -3.9 -3.9 -2.2 -4.5 -7.3 -7.5 -6.4 -8.5 -7.1 -8.2 -6.7 -7.8 -7.9 -9.1 -7.1 -9.4 0.9 -0.2 2.5 -0.3 -6.9 -6.5 -5.5 -6.8 -1.7 -3.4 -1.7 -2.8 -50.0 -13.0 -50.0 -11.5 1.5 0.7 2.7 0.6 -50.0 -8.9 -12.1 -7.4 -6.1 -11.5 -5.4 -10.4 +ACT -5.6 -1.8 -6.9 0.2 9.6 9.6 9.8 11.6 -6.6 0.9 -7.2 2.5 -2.3 -3.4 -2.3 -0.6 -6.5 -8.2 -8.0 -6.3 -3.7 -3.5 -3.9 -1.6 -9.9 -8.9 -9.6 -7.0 -8.6 -8.9 -8.8 -6.8 -8.6 -8.2 -8.8 -6.8 0.3 0.1 0.2 2.4 -7.4 -6.2 -6.9 -4.4 -2.9 -2.9 -3.5 -0.9 -50.0 -12.2 -50.0 -10.1 1.2 0.6 1.2 2.4 -50.0 -7.6 -16.1 -5.8 -7.2 -10.8 -7.4 -9.0 +AGA 5.1 -5.4 3.3 -4.8 -3.2 -7.1 -4.9 -6.6 13.3 -2.8 11.2 -1.9 -5.7 -12.5 -7.2 -11.6 -0.7 -4.8 -3.1 -4.3 -10.0 -11.6 -8.8 -11.7 10.5 7.7 9.1 8.5 -9.8 -12.7 -11.7 -11.8 -6.3 -11.4 -8.8 -11.0 -7.5 -10.3 -8.3 -9.9 -1.9 -6.7 -4.1 -6.4 -7.9 -12.5 -11.4 -11.7 -50.0 -14.0 -50.0 -13.2 -8.5 -10.4 -9.1 -9.6 -50.0 -9.8 -7.7 -8.8 -10.3 -17.9 -11.8 -16.0 +AGC -5.0 4.6 -5.5 2.9 -0.7 2.4 0.1 0.9 -2.8 12.8 -2.6 11.0 -8.4 -6.3 -8.3 -7.7 -6.1 -2.9 -6.5 -4.2 -8.5 -5.9 -7.5 -7.7 -5.5 -2.4 -5.5 -4.1 -12.9 -10.3 -12.7 -11.3 -7.4 -3.1 -7.4 -4.9 -4.4 -1.8 -3.5 -3.5 -1.6 3.0 -0.8 0.9 -8.4 -6.3 -9.6 -7.3 -50.0 -9.0 -50.0 -9.7 -2.3 -0.2 -1.2 -1.5 -50.0 -0.7 -13.3 -2.0 -11.8 -11.9 -12.3 -12.7 +AGG 3.6 -5.5 4.8 -5.2 -5.1 -7.2 -3.6 -7.2 11.2 -2.6 13.4 -2.1 -7.7 -12.5 -5.3 -11.9 -2.2 -4.4 -1.9 -4.8 -10.2 -11.5 -8.1 -12.0 9.3 8.2 10.0 8.0 -10.8 -11.9 -10.7 -12.4 -8.2 -11.8 -6.9 -11.5 -8.4 -9.9 -7.1 -10.6 -4.7 -6.8 -1.2 -7.0 -9.8 -11.6 -9.6 -12.1 -50.0 -14.1 -50.0 -12.7 -10.0 -10.8 -8.8 -10.5 -50.0 -9.2 -4.2 -9.3 -11.0 -18.2 -11.1 -16.1 +AGT -4.2 3.0 -5.1 5.2 -0.4 0.3 -0.2 2.5 -1.9 11.0 -2.1 13.2 -7.6 -8.5 -8.0 -5.3 -5.6 -4.5 -6.8 -2.4 -7.8 -7.9 -7.9 -6.2 -5.7 -5.1 -5.4 -2.1 -12.4 -12.3 -13.1 -10.1 -6.6 -4.4 -6.9 -2.6 -3.6 -3.8 -3.7 -2.0 -1.4 0.8 -0.9 3.0 -8.2 -8.1 -9.2 -5.8 -50.0 -10.1 -50.0 -7.4 -1.7 -2.0 -1.5 -0.5 -50.0 -2.1 -12.6 -0.4 -11.6 -13.9 -11.4 -11.1 +ATA -6.3 -10.2 -8.9 -9.1 0.7 -2.9 -1.7 -2.3 -5.7 -8.4 -7.7 -7.6 13.2 9.6 3.5 9.7 -8.7 -12.4 -10.7 -10.7 -7.8 -10.2 -8.8 -9.6 -9.5 -11.9 -10.9 -10.5 2.3 -0.4 -0.2 -0.3 -9.7 -15.5 -11.4 -13.7 -3.0 -6.4 -4.1 -5.4 -9.6 -11.9 -10.5 -11.8 6.2 3.3 3.7 3.6 -50.0 -13.6 -50.0 -11.9 -5.6 -8.8 -7.2 -8.9 -50.0 -12.4 -14.1 -11.6 2.8 -6.4 0.5 -5.2 +ATC -13.0 -7.9 -13.2 -9.5 -4.6 -1.2 -4.1 -3.4 -12.5 -6.3 -12.5 -8.5 9.6 12.7 0.2 10.5 -12.4 -11.6 -13.2 -12.0 -12.4 -10.1 -11.6 -12.0 -15.1 -13.1 -14.3 -13.4 -1.4 1.4 -1.6 -0.5 -14.9 -14.3 -15.2 -16.8 -7.7 -4.7 -6.3 -6.6 -14.6 -10.8 -12.8 -13.0 2.3 6.0 2.5 3.6 -50.0 -11.2 -50.0 -11.9 -10.5 -9.2 -10.5 -10.5 -50.0 -10.3 -16.2 -11.3 -1.7 -3.7 -2.5 -5.4 +ATG -7.1 -9.9 -5.7 -9.0 -0.4 -3.0 0.9 -2.3 -7.2 -8.3 -5.3 -8.0 3.5 0.2 14.3 1.0 -7.3 -11.2 -6.9 -9.7 -8.5 -10.2 -7.7 -9.8 -10.0 -10.4 -8.4 -10.2 1.0 -0.6 1.7 -0.5 -11.1 -14.8 -9.7 -14.5 -3.7 -5.8 -3.1 -5.6 -10.4 -11.5 -8.2 -11.1 0.6 -1.6 1.6 -1.1 -50.0 -12.4 -50.0 -11.5 -5.5 -8.0 -4.9 -7.2 -50.0 -12.4 -10.3 -11.6 0.7 -6.8 2.4 -6.0 +ATT -11.5 -9.6 -12.6 -7.0 -3.6 -3.3 -3.5 -0.6 -11.6 -7.7 -11.9 -5.3 9.7 10.5 1.0 12.6 -12.0 -12.2 -12.5 -9.7 -11.1 -11.2 -11.9 -9.4 -12.9 -14.3 -14.0 -11.9 -1.0 -0.4 -1.6 1.2 -13.9 -15.4 -13.7 -12.6 -6.2 -6.7 -6.3 -4.4 -13.0 -12.2 -13.0 -10.4 2.9 3.8 2.5 5.8 -50.0 -12.5 -50.0 -10.2 -9.6 -9.7 -9.6 -8.3 -50.0 -11.6 -15.4 -9.4 -1.1 -5.4 -1.6 -3.3 +CAA 0.4 -5.0 -1.5 -4.0 -4.8 -7.6 -5.9 -6.5 -0.7 -6.1 -2.2 -5.6 -8.7 -12.4 -7.3 -12.0 12.8 2.3 10.2 3.0 0.0 -3.4 -0.7 -3.2 2.5 -0.8 0.9 0.2 -3.0 -7.2 -5.7 -6.1 -0.2 -6.6 -1.8 -6.0 -5.3 -8.2 -5.7 -7.6 -6.7 -9.6 -7.5 -9.3 -7.1 -10.5 -9.6 -9.4 -50.0 -8.1 -50.0 -7.2 -4.3 -6.8 -5.7 -6.3 -50.0 -9.5 -9.6 -8.9 -6.2 -12.8 -6.8 -11.8 +CAC -6.0 0.5 -6.1 -1.0 -8.9 -6.7 -8.9 -8.2 -4.8 -2.9 -4.4 -4.5 -12.4 -11.6 -11.2 -12.2 2.3 14.6 1.9 12.9 -5.5 -2.6 -4.1 -4.7 -1.2 3.0 -1.1 1.6 -8.0 -4.7 -8.7 -6.0 -8.2 -4.7 -7.8 -6.7 -10.0 -8.3 -8.9 -9.9 -10.8 -7.1 -10.0 -9.2 -11.6 -10.1 -12.3 -11.9 -50.0 2.2 -50.0 0.9 -7.9 -5.9 -7.4 -6.9 -50.0 -4.6 -11.6 -5.6 -9.7 -5.9 -9.9 -7.3 +CAG -1.9 -5.5 -0.6 -5.0 -6.5 -8.1 -5.6 -8.0 -3.1 -6.5 -1.9 -6.8 -10.7 -13.2 -6.9 -12.5 10.2 1.9 11.9 2.1 -2.3 -4.1 -0.5 -4.6 -0.1 -0.9 2.4 -0.8 -5.1 -6.9 -4.8 -6.7 -2.3 -7.1 -0.8 -7.2 -7.2 -8.0 -5.6 -8.5 -9.4 -9.7 -7.2 -9.8 -9.2 -11.1 -9.5 -10.7 -50.0 -8.7 -50.0 -8.6 -6.1 -7.5 -5.5 -7.7 -50.0 -10.4 -7.7 -10.0 -7.3 -13.6 -6.4 -13.3 +CAT -5.3 -1.0 -6.1 1.0 -7.8 -7.9 -8.5 -6.3 -4.3 -4.2 -4.8 -2.4 -10.7 -12.0 -9.7 -9.7 3.0 12.9 2.1 14.7 -4.9 -3.8 -4.0 -2.4 -1.0 0.8 -1.3 3.4 -7.6 -5.7 -8.1 -3.5 -6.8 -5.8 -7.2 -4.2 -8.9 -10.2 -8.4 -8.2 -9.9 -8.4 -9.7 -6.4 -11.1 -11.1 -11.8 -10.0 -50.0 0.4 -50.0 2.6 -7.2 -7.0 -7.4 -5.1 -50.0 -5.7 -9.8 -3.2 -8.4 -7.4 -8.9 -5.9 +CCA -8.5 -10.3 -10.0 -9.0 -1.4 -4.9 -3.9 -3.7 -10.0 -8.5 -10.2 -7.8 -7.8 -12.4 -8.5 -11.1 0.0 -5.5 -2.3 -4.9 12.6 10.0 11.1 10.5 -6.0 -8.3 -6.9 -7.5 -2.2 -7.4 -5.3 -5.9 -8.9 -12.5 -9.4 -11.6 -0.7 -3.9 -2.3 -3.0 -9.1 -9.8 -9.2 -9.5 -5.6 -9.0 -8.0 -8.5 -50.0 -15.3 -50.0 -14.1 2.3 -1.4 0.3 -0.5 -50.0 -13.6 -14.3 -11.5 -5.3 -13.7 -6.1 -12.7 +CCC -11.2 -8.1 -11.8 -8.8 -4.1 -1.9 -3.9 -3.5 -11.6 -5.9 -11.5 -7.9 -10.2 -10.1 -10.2 -11.2 -3.4 -2.6 -4.1 -3.8 10.0 13.1 10.7 10.6 -8.1 -5.2 -8.0 -6.7 -6.1 -3.7 -7.5 -5.6 -11.1 -10.3 -10.6 -11.9 -3.2 -1.0 -2.2 -2.8 -10.5 -8.1 -9.4 -9.5 -8.3 -6.9 -9.4 -8.8 -50.0 -11.3 -50.0 -12.8 -0.6 2.2 -0.3 0.1 -50.0 -10.1 -17.5 -11.1 -8.6 -9.9 -8.6 -11.9 +CCG -8.9 -9.4 -8.6 -8.9 -2.9 -4.5 -2.2 -3.9 -8.8 -7.5 -8.1 -7.9 -8.8 -11.6 -7.7 -11.9 -0.7 -4.1 -0.5 -4.0 11.1 10.7 13.2 10.4 -5.7 -5.8 -3.5 -6.1 -3.6 -6.5 -3.3 -5.5 -9.4 -11.0 -8.7 -11.2 -1.8 -2.7 0.7 -3.0 -9.1 -8.2 -7.3 -9.4 -7.2 -8.2 -7.2 -8.2 -50.0 -13.2 -50.0 -13.3 0.5 -0.9 1.8 -0.6 -50.0 -11.4 -11.1 -10.5 -6.1 -12.9 -5.0 -11.9 +CCT -10.8 -9.6 -11.9 -7.7 -3.3 -3.8 -4.5 -1.6 -11.7 -7.7 -12.0 -6.2 -9.6 -12.0 -9.8 -9.4 -3.2 -4.7 -4.6 -2.4 10.5 10.6 10.4 12.6 -8.2 -7.9 -9.0 -4.7 -6.1 -6.0 -7.5 -2.9 -10.9 -12.2 -11.2 -10.9 -2.4 -2.9 -2.7 -0.9 -10.4 -9.7 -10.2 -8.0 -8.4 -9.1 -9.3 -6.5 -50.0 -13.3 -50.0 -11.0 -0.0 -0.4 -0.4 2.3 -50.0 -10.9 -17.2 -8.3 -8.3 -12.7 -7.8 -9.3 +CGA 2.1 -8.1 1.2 -7.0 -8.0 -9.5 -7.3 -9.9 10.5 -5.5 9.3 -5.7 -9.5 -15.1 -10.0 -12.9 2.5 -1.2 -0.1 -1.0 -6.0 -8.1 -5.7 -8.2 13.8 11.3 11.8 12.1 -6.0 -9.1 -9.0 -9.5 -8.9 -13.4 -10.0 -13.1 -9.9 -11.7 -9.8 -12.8 -5.9 -9.4 -6.5 -8.1 -10.7 -14.0 -13.3 -13.9 -50.0 -11.9 -50.0 -9.7 -8.9 -10.8 -9.0 -11.1 -50.0 -6.5 -5.3 -6.3 -10.5 -16.2 -10.4 -15.2 +CGC 0.0 -5.0 0.5 -6.8 -9.1 -7.8 -7.5 -8.9 7.7 -2.4 8.2 -5.1 -11.9 -13.1 -10.4 -14.3 -0.8 3.0 -0.9 0.8 -8.3 -5.2 -5.8 -7.9 11.3 15.0 11.2 12.8 -9.7 -5.3 -9.2 -7.8 -11.7 -10.6 -10.1 -12.2 -11.7 -8.8 -8.9 -11.8 -9.3 -5.0 -8.0 -8.2 -13.6 -11.4 -12.8 -13.0 -50.0 -7.2 -50.0 -8.4 -11.0 -7.7 -8.7 -10.2 -50.0 -1.5 -7.1 -4.9 -11.2 -11.6 -11.6 -15.0 +CGG 1.4 -7.3 2.2 -7.1 -8.0 -9.3 -6.4 -9.6 9.1 -5.5 10.0 -5.4 -10.9 -14.3 -8.4 -14.0 0.9 -1.1 2.4 -1.3 -6.9 -8.0 -3.5 -9.0 11.8 11.2 13.4 11.4 -7.3 -8.8 -6.5 -8.5 -10.0 -12.5 -7.8 -12.5 -9.6 -11.0 -7.1 -10.7 -7.9 -8.0 -4.3 -9.4 -11.4 -12.3 -10.9 -12.9 -50.0 -11.8 -50.0 -11.2 -9.4 -9.8 -7.3 -10.5 -50.0 -6.2 -2.2 -6.7 -9.8 -15.2 -8.5 -14.9 +CGT 0.2 -6.3 0.1 -4.3 -8.2 -8.9 -8.5 -7.0 8.5 -4.1 8.0 -2.1 -10.5 -13.4 -10.2 -11.9 0.2 1.6 -0.8 3.4 -7.5 -6.7 -6.1 -4.7 12.1 12.8 11.4 14.7 -8.7 -7.4 -9.4 -5.3 -9.9 -11.8 -10.5 -10.3 -9.9 -10.1 -9.3 -9.3 -8.1 -7.2 -8.0 -5.2 -12.0 -12.1 -12.6 -10.9 -50.0 -7.9 -50.0 -5.3 -9.3 -8.8 -8.9 -7.0 -50.0 -3.6 -7.1 -1.2 -10.2 -12.7 -9.9 -11.0 +CTA -10.2 -13.4 -11.8 -12.4 -6.0 -9.4 -7.1 -8.6 -9.8 -12.9 -10.8 -12.4 2.3 -1.4 1.0 -1.0 -3.0 -8.0 -5.1 -7.6 -2.2 -6.1 -3.6 -6.1 -6.0 -9.7 -7.3 -8.7 11.2 7.9 8.9 8.1 -12.1 -17.4 -13.2 -16.8 -6.3 -9.5 -7.5 -8.8 -12.7 -15.2 -13.1 -14.2 0.0 -3.1 -1.8 -3.1 -50.0 -10.7 -50.0 -9.3 -4.6 -8.9 -5.9 -8.3 -50.0 -12.8 -9.6 -10.6 9.5 -3.2 8.2 -2.8 +CTC -13.5 -11.3 -14.0 -12.9 -9.3 -7.1 -8.2 -8.9 -12.7 -10.3 -11.9 -12.3 -0.4 1.4 -0.6 -0.4 -7.2 -4.7 -6.9 -5.7 -7.4 -3.7 -6.5 -6.0 -9.1 -5.3 -8.8 -7.4 7.9 11.9 7.8 9.3 -15.2 -15.1 -14.6 -17.2 -9.7 -7.3 -8.2 -9.2 -15.0 -12.8 -14.1 -14.9 -2.8 0.2 -2.9 -2.3 -50.0 -7.4 -50.0 -8.7 -9.4 -7.3 -9.4 -9.4 -50.0 -8.0 -11.9 -9.7 6.5 0.5 6.4 -1.8 +CTG -13.0 -14.4 -11.9 -13.9 -7.7 -9.0 -6.7 -8.8 -11.7 -12.7 -10.7 -13.1 -0.2 -1.6 1.7 -1.6 -5.7 -8.7 -4.8 -8.1 -5.3 -7.5 -3.3 -7.5 -9.0 -9.2 -6.5 -9.4 8.9 7.8 10.1 7.8 -14.3 -17.7 -13.0 -17.8 -8.0 -9.4 -6.2 -9.2 -15.1 -14.9 -12.2 -14.9 -1.8 -3.2 -0.8 -3.2 -50.0 -10.8 -50.0 -9.9 -7.3 -9.6 -6.4 -9.4 -50.0 -12.3 -8.7 -11.3 7.6 -3.3 8.7 -2.9 +CTT -12.5 -12.9 -13.4 -10.0 -8.5 -9.0 -7.8 -6.8 -11.8 -11.3 -12.4 -10.1 -0.3 -0.5 -0.5 1.2 -6.1 -6.0 -6.7 -3.5 -5.9 -5.6 -5.5 -2.9 -9.5 -7.8 -8.5 -5.3 8.1 9.3 7.8 11.8 -14.4 -15.7 -14.5 -14.6 -8.4 -8.7 -7.9 -7.1 -14.3 -13.8 -13.8 -12.3 -2.4 -2.2 -2.9 -0.2 -50.0 -8.9 -50.0 -7.1 -8.1 -9.1 -8.7 -6.6 -50.0 -9.7 -11.4 -8.0 6.8 -1.8 6.9 0.4 +GAA -2.6 -6.3 -4.9 -4.9 -6.4 -9.4 -7.9 -8.6 -6.3 -7.4 -8.2 -6.6 -9.7 -14.9 -11.1 -13.9 -0.2 -8.2 -2.3 -6.8 -8.9 -11.1 -9.4 -10.9 -8.9 -11.7 -10.0 -9.9 -12.1 -15.2 -14.3 -14.4 11.1 2.9 9.2 3.5 -3.0 -7.0 -4.3 -6.2 -2.2 -6.6 -3.7 -5.7 -6.0 -10.4 -8.6 -9.5 -50.0 -15.5 -50.0 -13.9 -7.8 -10.5 -8.7 -9.6 -50.0 -16.5 -17.1 -14.8 -12.1 -18.9 -12.4 -17.4 +GAC -8.5 0.8 -9.1 -0.7 -10.1 -7.5 -9.1 -8.2 -11.4 -3.1 -11.8 -4.4 -15.5 -14.3 -14.8 -15.4 -6.6 -4.7 -7.1 -5.8 -12.5 -10.3 -11.0 -12.2 -13.4 -10.6 -12.5 -11.8 -17.4 -15.1 -17.7 -15.7 2.9 12.5 3.1 10.3 -7.7 -5.2 -6.2 -7.3 -5.4 -1.7 -5.4 -3.7 -11.6 -9.1 -13.0 -10.8 -50.0 -10.3 -50.0 -11.2 -9.8 -8.3 -9.1 -9.7 -50.0 -12.7 -20.6 -13.1 -16.0 -16.6 -17.0 -18.4 +GAG -5.0 -6.4 -3.4 -5.6 -7.5 -9.3 -7.1 -8.8 -8.8 -7.4 -6.9 -6.9 -11.4 -15.2 -9.7 -13.7 -1.8 -7.8 -0.8 -7.2 -9.4 -10.6 -8.7 -11.2 -10.0 -10.1 -7.8 -10.5 -13.2 -14.6 -13.0 -14.5 9.2 3.1 10.8 3.0 -4.5 -6.7 -2.7 -6.7 -5.0 -6.8 -2.5 -6.7 -7.5 -10.4 -7.4 -10.2 -50.0 -15.5 -50.0 -14.2 -8.9 -10.2 -8.5 -10.0 -50.0 -15.6 -15.3 -15.5 -13.0 -18.8 -12.6 -18.3 +GAT -8.1 -1.1 -8.9 1.4 -8.6 -9.2 -9.4 -6.8 -11.0 -4.9 -11.5 -2.6 -13.7 -16.8 -14.5 -12.6 -6.0 -6.7 -7.2 -4.2 -11.6 -11.9 -11.2 -10.9 -13.1 -12.2 -12.5 -10.3 -16.8 -17.2 -17.8 -14.6 3.5 10.3 3.0 12.4 -6.9 -7.5 -6.8 -4.9 -5.4 -4.2 -5.4 -1.3 -10.8 -11.5 -12.4 -8.2 -50.0 -12.5 -50.0 -8.9 -9.2 -10.0 -9.4 -7.9 -50.0 -14.3 -19.9 -11.4 -16.2 -19.2 -15.8 -15.9 +GCA -6.3 -7.4 -8.0 -6.0 2.8 -0.7 0.9 0.3 -7.5 -4.4 -8.4 -3.6 -3.0 -7.7 -3.7 -6.2 -5.3 -10.0 -7.2 -8.9 -0.7 -3.2 -1.8 -2.4 -9.9 -11.7 -9.6 -9.9 -6.3 -9.7 -8.0 -8.4 -3.0 -7.7 -4.5 -6.9 11.3 8.2 9.4 9.1 -1.0 -3.3 -1.8 -2.4 1.5 -2.1 -0.4 -1.1 -50.0 -14.5 -50.0 -12.9 2.8 -0.0 1.4 0.7 -50.0 -8.7 -14.1 -7.5 -5.2 -12.5 -6.5 -10.7 +GCC -9.9 -5.0 -10.0 -6.5 -0.7 2.6 -0.2 0.1 -10.3 -1.8 -9.9 -3.8 -6.4 -4.7 -5.8 -6.7 -8.2 -8.3 -8.0 -10.2 -3.9 -1.0 -2.7 -2.9 -11.7 -8.8 -11.0 -10.1 -9.5 -7.3 -9.4 -8.7 -7.0 -5.2 -6.7 -7.5 8.2 11.6 8.8 9.1 -3.7 -0.9 -3.1 -3.0 -2.0 1.2 -2.3 -1.3 -50.0 -12.1 -50.0 -12.6 0.1 2.5 0.7 0.6 -50.0 -6.0 -14.8 -7.5 -8.6 -9.4 -8.5 -11.2 +GCG -7.5 -6.2 -7.2 -6.2 0.9 0.2 2.5 0.2 -8.3 -3.5 -7.1 -3.7 -4.1 -6.3 -3.1 -6.3 -5.7 -8.9 -5.6 -8.4 -2.3 -2.2 0.7 -2.7 -9.8 -8.9 -7.1 -9.3 -7.5 -8.2 -6.2 -7.9 -4.3 -6.2 -2.7 -6.8 9.4 8.8 12.1 8.8 -2.1 -2.0 0.1 -2.4 0.0 -1.1 1.3 -0.8 -50.0 -11.9 -50.0 -12.6 1.2 0.7 3.5 0.8 -50.0 -7.1 -12.4 -7.1 -7.1 -10.6 -5.0 -10.6 +GCT -9.0 -6.5 -10.0 -4.8 0.1 -0.1 -0.3 2.4 -9.9 -3.5 -10.6 -2.0 -5.4 -6.6 -5.6 -4.4 -7.6 -9.9 -8.5 -8.2 -3.0 -2.8 -3.0 -0.9 -12.8 -11.8 -10.7 -9.3 -8.8 -9.2 -9.2 -7.1 -6.2 -7.3 -6.7 -4.9 9.1 9.1 8.8 11.2 -3.1 -3.0 -3.3 -1.0 -1.1 -1.4 -2.0 1.2 -50.0 -12.8 -50.0 -10.9 0.8 0.5 0.7 2.3 -50.0 -7.5 -16.5 -5.9 -7.8 -11.1 -7.8 -9.0 +GGA -7.1 -5.6 -9.1 -5.1 -5.9 -8.0 -6.9 -7.4 -1.9 -1.6 -4.7 -1.4 -9.6 -14.6 -10.4 -13.0 -6.7 -10.8 -9.4 -9.9 -9.1 -10.5 -9.1 -10.4 -5.9 -9.3 -7.9 -8.1 -12.7 -15.0 -15.1 -14.3 -2.2 -5.4 -5.0 -5.4 -1.0 -3.7 -2.1 -3.1 12.8 9.6 11.1 10.1 -4.8 -9.4 -8.1 -8.6 -50.0 -18.3 -50.0 -15.4 -5.9 -7.5 -6.6 -7.1 -50.0 -9.5 -11.7 -8.4 -12.5 -17.3 -13.2 -15.4 +GGC -10.2 -1.6 -10.2 -3.2 -7.3 -4.6 -6.5 -6.2 -6.7 3.0 -6.8 0.8 -11.9 -10.8 -11.5 -12.2 -9.6 -7.1 -9.7 -8.4 -9.8 -8.1 -8.2 -9.7 -9.4 -5.0 -8.0 -7.2 -15.2 -12.8 -14.9 -13.8 -6.6 -1.7 -6.8 -4.2 -3.3 -0.9 -2.0 -3.0 9.6 12.8 9.7 10.5 -8.5 -5.6 -9.3 -7.8 -50.0 -12.7 -50.0 -13.8 -6.8 -5.1 -5.3 -6.5 -50.0 -4.1 -12.8 -5.8 -14.4 -14.1 -13.6 -14.8 +GGG -8.2 -4.7 -7.1 -5.0 -6.5 -7.7 -5.5 -6.9 -4.1 -0.8 -1.2 -0.9 -10.5 -12.8 -8.2 -13.0 -7.5 -10.0 -7.2 -9.7 -9.2 -9.4 -7.3 -10.2 -6.5 -8.0 -4.3 -8.0 -13.1 -14.1 -12.2 -13.8 -3.7 -5.4 -2.5 -5.4 -1.8 -3.1 0.1 -3.3 11.1 9.7 12.9 9.8 -5.5 -8.1 -5.4 -7.8 -50.0 -16.4 -50.0 -14.8 -5.9 -7.1 -4.7 -7.2 -50.0 -8.4 -6.4 -8.1 -12.1 -16.2 -10.3 -15.8 +GGT -9.2 -3.0 -9.9 -1.2 -6.5 -6.1 -6.8 -4.4 -6.4 0.9 -7.0 3.0 -11.8 -13.0 -11.1 -10.4 -9.3 -9.2 -9.8 -6.4 -9.5 -9.5 -9.4 -8.0 -8.1 -8.2 -9.4 -5.2 -14.2 -14.9 -14.9 -12.3 -5.7 -3.7 -6.7 -1.3 -2.4 -3.0 -2.4 -1.0 10.1 10.5 9.8 13.1 -7.2 -8.0 -8.4 -5.0 -50.0 -14.7 -50.0 -11.4 -6.3 -6.7 -5.8 -5.0 -50.0 -6.1 -13.4 -3.6 -13.0 -16.0 -13.7 -13.3 +GTA -8.2 -10.8 -10.1 -9.8 -0.4 -3.6 -1.7 -2.9 -7.9 -8.4 -9.8 -8.2 6.2 2.3 0.6 2.9 -7.1 -11.6 -9.2 -11.1 -5.6 -8.3 -7.2 -8.4 -10.7 -13.6 -11.4 -12.0 0.0 -2.8 -1.8 -2.4 -6.0 -11.6 -7.5 -10.8 1.5 -2.0 0.0 -1.1 -4.8 -8.5 -5.5 -7.2 11.9 8.6 10.0 9.0 -50.0 -14.2 -50.0 -12.8 -4.1 -7.4 -5.7 -6.5 -50.0 -11.0 -14.3 -9.9 1.4 -7.9 -0.7 -6.8 +GTC -12.5 -8.7 -13.0 -9.9 -3.9 -1.0 -3.4 -2.9 -12.5 -6.3 -11.6 -8.1 3.3 6.0 -1.6 3.8 -10.5 -10.1 -11.1 -11.1 -9.0 -6.9 -8.2 -9.1 -14.0 -11.4 -12.3 -12.1 -3.1 0.2 -3.2 -2.2 -10.4 -9.1 -10.4 -11.5 -2.1 1.2 -1.1 -1.4 -9.4 -5.6 -8.1 -8.0 8.6 12.4 8.6 9.5 -50.0 -10.4 -50.0 -11.6 -7.2 -5.7 -7.5 -7.4 -50.0 -8.6 -15.7 -9.7 -3.0 -3.7 -3.2 -5.9 +GTG -11.1 -11.9 -10.6 -11.3 -2.6 -4.1 -1.7 -3.5 -11.4 -9.6 -9.6 -9.2 3.7 2.5 1.6 2.5 -9.6 -12.3 -9.5 -11.8 -8.0 -9.4 -7.2 -9.3 -13.3 -12.8 -10.9 -12.6 -1.8 -2.9 -0.8 -2.9 -8.6 -13.0 -7.4 -12.4 -0.4 -2.3 1.3 -2.0 -8.1 -9.3 -5.4 -8.4 10.0 8.6 11.4 8.9 -50.0 -13.7 -50.0 -13.2 -6.1 -8.1 -5.5 -7.5 -50.0 -11.3 -12.7 -10.1 -1.6 -7.8 -0.3 -7.2 +GTT -11.4 -10.0 -12.7 -8.2 -3.1 -3.1 -2.8 -0.9 -11.7 -7.3 -12.1 -5.8 3.6 3.6 -1.1 5.8 -9.4 -11.9 -10.7 -10.0 -8.5 -8.8 -8.2 -6.5 -13.9 -13.0 -12.9 -10.9 -3.1 -2.3 -3.2 -0.2 -9.5 -10.8 -10.2 -8.2 -1.1 -1.3 -0.8 1.2 -8.6 -7.8 -7.8 -5.0 9.0 9.5 8.9 12.0 -50.0 -13.0 -50.0 -10.3 -6.2 -7.0 -7.1 -5.4 -50.0 -9.3 -14.0 -8.2 -2.4 -6.3 -2.4 -3.9 +TAA -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 33.3 -50.0 30.6 -50.0 -50.0 -50.0 -50.0 -50.0 29.2 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 +TAC -14.8 -6.2 -14.9 -7.6 -14.6 -11.6 -13.0 -12.2 -14.0 -9.0 -14.1 -10.1 -13.6 -11.2 -12.4 -12.5 -8.1 2.2 -8.7 0.4 -15.3 -11.3 -13.2 -13.3 -11.9 -7.2 -11.8 -7.9 -10.7 -7.4 -10.8 -8.9 -15.5 -10.3 -15.5 -12.5 -14.5 -12.1 -11.9 -12.8 -18.3 -12.7 -16.4 -14.7 -14.2 -10.4 -13.7 -13.0 -50.0 15.1 -50.0 13.3 -9.9 -5.8 -8.9 -7.6 -50.0 -1.5 -7.8 -3.0 -8.1 3.6 -9.4 2.0 +TAG -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 30.6 -50.0 35.2 -50.0 -50.0 -50.0 -50.0 -50.0 28.5 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 +TAT -13.8 -7.5 -14.4 -5.1 -12.6 -11.6 -11.5 -10.1 -13.2 -9.7 -12.7 -7.4 -11.9 -11.9 -11.5 -10.2 -7.2 0.9 -8.6 2.6 -14.1 -12.8 -13.3 -11.0 -9.7 -8.4 -11.2 -5.3 -9.3 -8.7 -9.9 -7.1 -13.9 -11.2 -14.2 -8.9 -12.9 -12.6 -12.6 -10.9 -15.4 -13.8 -14.8 -11.4 -12.8 -11.6 -13.2 -10.3 -50.0 13.3 -50.0 15.2 -8.6 -7.0 -8.4 -4.8 -50.0 -3.0 -7.1 -0.1 -7.3 2.3 -8.7 3.9 +TCA -7.3 -6.3 -8.9 -5.5 2.9 0.2 1.5 1.2 -8.5 -2.3 -10.0 -1.7 -5.6 -10.5 -5.5 -9.6 -4.3 -7.9 -6.1 -7.2 2.3 -0.6 0.5 -0.0 -8.9 -11.0 -9.4 -9.3 -4.6 -9.4 -7.3 -8.1 -7.8 -9.8 -8.9 -9.2 2.8 0.1 1.2 0.8 -5.9 -6.8 -5.9 -6.3 -4.1 -7.2 -6.1 -6.2 -50.0 -9.9 -50.0 -8.6 12.5 9.4 11.0 9.8 -50.0 -4.8 -8.7 -3.5 -0.0 -8.6 -2.1 -7.0 +TCC -10.1 -4.3 -10.8 -5.8 0.1 2.3 0.7 0.6 -10.4 -0.2 -10.8 -2.0 -8.8 -9.2 -8.0 -9.7 -6.8 -5.9 -7.5 -7.0 -1.4 2.2 -0.9 -0.4 -10.8 -7.7 -9.8 -8.8 -8.9 -7.3 -9.6 -9.1 -10.5 -8.3 -10.2 -10.0 -0.0 2.5 0.7 0.5 -7.5 -5.1 -7.1 -6.7 -7.4 -5.7 -8.1 -7.0 -50.0 -5.8 -50.0 -7.0 9.4 12.7 10.1 10.0 -50.0 -1.5 -11.3 -3.3 -5.4 -4.2 -5.6 -6.8 +TCG -8.4 -6.2 -8.9 -5.9 1.8 0.4 2.7 1.2 -9.1 -1.2 -8.8 -1.5 -7.2 -10.5 -4.9 -9.6 -5.7 -7.4 -5.5 -7.4 0.3 -0.3 1.8 -0.4 -9.0 -8.7 -7.3 -8.9 -5.9 -9.4 -6.4 -8.7 -8.7 -9.1 -8.5 -9.4 1.4 0.7 3.5 0.7 -6.6 -5.3 -4.7 -5.8 -5.7 -7.5 -5.5 -7.1 -50.0 -8.9 -50.0 -8.4 11.0 10.1 13.2 10.2 -50.0 -4.2 -6.1 -3.6 -3.5 -8.4 -1.2 -6.8 +TCT -9.1 -5.4 -10.4 -4.2 1.0 0.5 0.6 2.4 -9.6 -1.5 -10.5 -0.5 -8.9 -10.5 -7.2 -8.3 -6.3 -6.9 -7.7 -5.1 -0.5 0.1 -0.6 2.3 -11.1 -10.2 -10.5 -7.0 -8.3 -9.4 -9.4 -6.6 -9.6 -9.7 -10.0 -7.9 0.7 0.6 0.8 2.3 -7.1 -6.5 -7.2 -5.0 -6.5 -7.4 -7.5 -5.4 -50.0 -7.6 -50.0 -4.8 9.8 10.0 10.2 12.1 -50.0 -3.1 -11.4 -0.7 -4.6 -6.6 -4.9 -3.9 +TGA -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 29.2 -50.0 28.5 -50.0 -50.0 -50.0 -50.0 -50.0 33.3 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 -50.0 +TGC -13.0 -7.0 -13.1 -8.3 -9.2 -6.5 -8.9 -7.6 -9.8 -0.7 -9.2 -2.1 -12.4 -10.3 -12.4 -11.6 -9.5 -4.6 -10.4 -5.7 -13.6 -10.1 -11.4 -10.9 -6.5 -1.5 -6.2 -3.6 -12.8 -8.0 -12.3 -9.7 -16.5 -12.7 -15.6 -14.3 -8.7 -6.0 -7.1 -7.5 -9.5 -4.1 -8.4 -6.1 -11.0 -8.6 -11.3 -9.3 -50.0 -1.5 -50.0 -3.0 -4.8 -1.5 -4.2 -3.1 -50.0 16.4 -5.1 14.2 -10.1 -4.3 -10.0 -5.4 +TGG -13.5 -16.3 -11.8 -15.5 -13.6 -15.3 -12.1 -16.1 -7.7 -13.3 -4.2 -12.6 -14.1 -16.2 -10.3 -15.4 -9.6 -11.6 -7.7 -9.8 -14.3 -17.5 -11.1 -17.2 -5.3 -7.1 -2.2 -7.1 -9.6 -11.9 -8.7 -11.4 -17.1 -20.6 -15.3 -19.9 -14.1 -14.8 -12.4 -16.5 -11.7 -12.8 -6.4 -13.4 -14.3 -15.7 -12.7 -14.0 -50.0 -7.8 -50.0 -7.1 -8.7 -11.3 -6.1 -11.4 -50.0 -5.1 18.6 -4.8 -8.6 -8.1 -4.5 -7.3 +TGT -12.4 -8.2 -13.0 -5.9 -7.7 -7.8 -7.4 -5.8 -8.8 -2.0 -9.3 -0.4 -11.6 -11.3 -11.6 -9.4 -8.9 -5.6 -10.0 -3.2 -11.5 -11.1 -10.5 -8.3 -6.3 -4.9 -6.7 -1.2 -10.6 -9.7 -11.3 -8.0 -14.8 -13.1 -15.5 -11.4 -7.5 -7.5 -7.1 -5.9 -8.4 -5.8 -8.1 -3.6 -9.9 -9.7 -10.1 -8.2 -50.0 -3.0 -50.0 -0.1 -3.5 -3.3 -3.6 -0.7 -50.0 14.2 -4.8 16.4 -8.9 -5.8 -8.9 -3.4 +TTA -10.7 -13.2 -12.4 -11.6 -4.8 -8.4 -6.1 -7.2 -10.3 -11.8 -11.0 -11.6 2.8 -1.7 0.7 -1.1 -6.2 -9.7 -7.3 -8.4 -5.3 -8.6 -6.1 -8.3 -10.5 -11.2 -9.8 -10.2 9.5 6.5 7.6 6.8 -12.1 -16.0 -13.0 -16.2 -5.2 -8.6 -7.1 -7.8 -12.5 -14.4 -12.1 -13.0 1.4 -3.0 -1.6 -2.4 -50.0 -8.1 -50.0 -7.3 -0.0 -5.4 -3.5 -4.6 -50.0 -10.1 -8.6 -8.9 13.2 -0.9 9.7 -0.3 +TTC -18.1 -12.3 -19.4 -14.2 -12.2 -9.5 -11.5 -10.8 -17.9 -11.9 -18.2 -13.9 -6.4 -3.7 -6.8 -5.4 -12.8 -5.9 -13.6 -7.4 -13.7 -9.9 -12.9 -12.7 -16.2 -11.6 -15.2 -12.7 -3.2 0.5 -3.3 -1.8 -18.9 -16.6 -18.8 -19.2 -12.5 -9.4 -10.6 -11.1 -17.3 -14.1 -16.2 -16.0 -7.9 -3.7 -7.8 -6.3 -50.0 3.6 -50.0 2.3 -8.6 -4.2 -8.4 -6.6 -50.0 -4.3 -8.1 -5.8 -0.9 14.2 -1.8 11.6 +TTG -11.8 -13.1 -11.5 -12.0 -6.2 -8.3 -5.4 -7.4 -11.8 -12.3 -11.1 -11.4 0.5 -2.5 2.4 -1.6 -6.8 -9.9 -6.4 -8.9 -6.1 -8.6 -5.0 -7.8 -10.4 -11.6 -8.5 -9.9 8.2 6.4 8.7 6.9 -12.4 -17.0 -12.6 -15.8 -6.5 -8.5 -5.0 -7.8 -13.2 -13.6 -10.3 -13.7 -0.7 -3.2 -0.3 -2.4 -50.0 -9.4 -50.0 -8.7 -2.1 -5.6 -1.2 -4.9 -50.0 -10.0 -4.5 -8.9 9.7 -1.8 11.3 -0.9 +TTT -17.2 -13.3 -17.8 -11.6 -11.0 -11.1 -10.4 -9.0 -16.0 -12.7 -16.1 -11.1 -5.2 -5.4 -6.0 -3.3 -11.8 -7.3 -13.3 -5.9 -12.7 -11.9 -11.9 -9.3 -15.2 -15.0 -14.9 -11.0 -2.8 -1.8 -2.9 0.4 -17.4 -18.4 -18.3 -15.9 -10.7 -11.2 -10.6 -9.0 -15.4 -14.8 -15.8 -13.3 -6.8 -5.9 -7.2 -3.9 -50.0 2.0 -50.0 3.9 -7.0 -6.8 -6.8 -3.9 -50.0 -5.4 -7.3 -3.4 -0.3 11.6 -0.9 14.1 diff --git a/code/lib/Bio/Align/substitution_matrices/data/STR b/code/lib/Bio/Align/substitution_matrices/data/STR new file mode 100644 index 0000000..23189c3 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/STR @@ -0,0 +1,26 @@ +# Steven Henikoff and Jorja G. Henikoff: +# "Performance evaluation of amino acid substitution matrices." +# Proteins: Structure, Function, and Genetics: 17(1): 49-61 (1993). +# Figure 1, lower triangle. +# PMID 8234244 + A C D E F G H I K L M N P Q R S T V W Y +A 4 -2 -1 0 -3 0 -2 -2 -1 -2 0 -1 -1 0 -1 0 -1 0 -3 -3 +C -2 11 -7 -3 -2 -6 -6 -4 -4 -6 -5 -6 -8 -3 -2 -4 -5 -4 -6 -6 +D -1 -7 6 2 -5 -1 0 -3 -1 -6 -4 2 -1 0 -2 0 -1 -4 -6 -3 +E 0 -3 2 5 -4 -2 -2 -3 1 -4 -2 0 -1 2 0 -1 0 -2 -6 -2 +F -3 -2 -5 -4 7 -6 -2 1 -3 2 0 -3 -5 -4 -4 -3 -3 -1 2 3 +G 0 -6 -1 -2 -6 5 -3 -5 -3 -5 -4 -1 -2 -2 -2 -1 -3 -4 -4 -3 +H -2 -6 0 -2 -2 -3 8 -5 0 -3 -2 2 -3 0 0 -2 -2 -2 -3 0 +I -2 -4 -3 -3 1 -5 -5 6 -3 2 1 -3 -4 -5 -3 -3 -2 2 -2 -1 +K -1 -4 -1 1 -3 -3 0 -3 5 -2 -1 0 -1 1 2 -1 0 -3 -3 -2 +L -2 -6 -6 -4 2 -5 -3 2 -2 5 3 -3 -3 -3 -3 -4 -3 1 -1 -2 +M 0 -5 -4 -2 0 -4 -2 1 -1 3 8 -2 -6 1 -4 -4 -2 0 -2 -1 +N -1 -6 2 0 -3 -1 2 -3 0 -3 -2 5 -2 0 -1 0 0 -4 -5 -1 +P -1 -8 -1 -1 -5 -2 -3 -4 -1 -3 -6 -2 7 -2 -2 -1 -1 -4 -4 -6 +Q 0 -3 0 2 -4 -2 0 -5 1 -3 1 0 -2 6 1 -1 0 -2 -5 -3 +R -1 -2 -2 0 -4 -2 0 -3 2 -3 -4 -1 -2 1 7 0 -1 -3 -2 -1 +S 0 -4 0 -1 -3 -1 -2 -3 -1 -4 -4 0 -1 -1 0 4 1 -3 -5 -2 +T -1 -5 -1 0 -3 -3 -2 -2 0 -3 -2 0 -1 0 -1 1 5 -1 -5 -2 +V 0 -4 -4 -2 -1 -4 -2 2 -3 1 0 -4 -4 -2 -3 -3 -1 5 -4 -1 +W -3 -6 -6 -6 2 -4 -3 -2 -3 -1 -2 -5 -4 -5 -2 -5 -5 -4 10 2 +Y -3 -6 -3 -2 3 -3 0 -1 -2 -2 -1 -1 -6 -3 -1 -2 -2 -1 2 7 diff --git a/code/lib/Bio/Align/substitution_matrices/data/TRANS b/code/lib/Bio/Align/substitution_matrices/data/TRANS new file mode 100644 index 0000000..611e6b9 --- /dev/null +++ b/code/lib/Bio/Align/substitution_matrices/data/TRANS @@ -0,0 +1,12 @@ +# David Wheeler, +# Department of Cell Biology, Baylor College of Medicine, Houston, Texas: +# "Weight matrices for sequence similarity scoring." +# Version 2.0, May 1996. +# David Wheeler defined the Transition/Transversion Matrix as a penalty +# matrix; the matrix below is a similarity matrix where +# similarity = 5 - penalty. + A T C G +A 5 0 0 4 +T 0 5 4 0 +C 0 4 5 0 +G 4 0 0 5 diff --git a/code/lib/Bio/AlignIO/ClustalIO.py b/code/lib/Bio/AlignIO/ClustalIO.py new file mode 100644 index 0000000..49fc51a --- /dev/null +++ b/code/lib/Bio/AlignIO/ClustalIO.py @@ -0,0 +1,305 @@ +# Copyright 2006-2016 by Peter Cock. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Bio.AlignIO support for "clustal" output from CLUSTAL W and other tools. + +You are expected to use this module via the Bio.AlignIO functions (or the +Bio.SeqIO functions if you want to work directly with the gapped sequences). +""" +from Bio.Align import MultipleSeqAlignment +from Bio.AlignIO.Interfaces import AlignmentIterator +from Bio.AlignIO.Interfaces import SequentialAlignmentWriter +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + + +class ClustalWriter(SequentialAlignmentWriter): + """Clustalw alignment writer.""" + + def write_alignment(self, alignment): + """Use this to write (another) single alignment to an open file.""" + if len(alignment) == 0: + raise ValueError("Must have at least one sequence") + if alignment.get_alignment_length() == 0: + # This doubles as a check for an alignment object + raise ValueError("Non-empty sequences are required") + + # Old versions of the parser in Bio.Clustalw used a ._version property + try: + version = str(alignment._version) + except AttributeError: + version = "" + if not version: + version = "1.81" + if version.startswith("2."): + # e.g. 2.0.x + output = "CLUSTAL %s multiple sequence alignment\n\n\n" % version + else: + # e.g. 1.81 or 1.83 + output = "CLUSTAL X (%s) multiple sequence alignment\n\n\n" % version + + cur_char = 0 + max_length = len(alignment[0]) + + if max_length <= 0: + raise ValueError("Non-empty sequences are required") + + if "clustal_consensus" in alignment.column_annotations: + star_info = alignment.column_annotations["clustal_consensus"] + else: + try: + # This was originally stored by Bio.Clustalw as ._star_info + star_info = alignment._star_info + except AttributeError: + star_info = None + + # keep displaying sequences until we reach the end + while cur_char != max_length: + # calculate the number of sequences to show, which will + # be less if we are at the end of the sequence + if (cur_char + 50) > max_length: + show_num = max_length - cur_char + else: + show_num = 50 + + # go through all of the records and print out the sequences + # when we output, we do a nice 80 column output, although this + # may result in truncation of the ids. + for record in alignment: + # Make sure we don't get any spaces in the record + # identifier when output in the file by replacing + # them with underscores: + line = record.id[0:30].replace(" ", "_").ljust(36) + line += str(record.seq[cur_char : (cur_char + show_num)]) + output += line + "\n" + + # now we need to print out the star info, if we've got it + if star_info: + output += ( + (" " * 36) + star_info[cur_char : (cur_char + show_num)] + "\n" + ) + + output += "\n" + cur_char += show_num + + # Want a trailing blank new line in case the output is concatenated + self.handle.write(output + "\n") + + +class ClustalIterator(AlignmentIterator): + """Clustalw alignment iterator.""" + + _header = None # for caching lines between __next__ calls + + def __next__(self): + """Parse the next alignment from the handle.""" + handle = self.handle + + if self._header is None: + line = handle.readline() + else: + # Header we saved from when we were parsing + # the previous alignment. + line = self._header + self._header = None + + if not line: + raise StopIteration + + # Whitelisted headers we know about + known_headers = ["CLUSTAL", "PROBCONS", "MUSCLE", "MSAPROBS", "Kalign"] + if line.strip().split()[0] not in known_headers: + raise ValueError( + "%s is not a known CLUSTAL header: %s" + % (line.strip().split()[0], ", ".join(known_headers)) + ) + + # find the clustal version in the header line + version = None + for word in line.split(): + if word[0] == "(" and word[-1] == ")": + word = word[1:-1] + if word[0] in "0123456789": + version = word + break + + # There should be two blank lines after the header line + line = handle.readline() + while line.strip() == "": + line = handle.readline() + + # If the alignment contains entries with the same sequence + # identifier (not a good idea - but seems possible), then this + # dictionary based parser will merge their sequences. Fix this? + ids = [] + seqs = [] + consensus = "" + seq_cols = None # Used to extract the consensus + + # Use the first block to get the sequence identifiers + while True: + if line[0] != " " and line.strip() != "": + # Sequences identifier... + fields = line.rstrip().split() + + # We expect there to be two fields, there can be an optional + # "sequence number" field containing the letter count. + if len(fields) < 2 or len(fields) > 3: + raise ValueError("Could not parse line:\n%s" % line) + + ids.append(fields[0]) + seqs.append(fields[1]) + + # Record the sequence position to get the consensus + if seq_cols is None: + start = len(fields[0]) + line[len(fields[0]) :].find(fields[1]) + end = start + len(fields[1]) + seq_cols = slice(start, end) + del start, end + assert fields[1] == line[seq_cols] + + if len(fields) == 3: + # This MAY be an old style file with a letter count... + try: + letters = int(fields[2]) + except ValueError: + raise ValueError( + "Could not parse line, bad sequence number:\n%s" % line + ) from None + if len(fields[1].replace("-", "")) != letters: + raise ValueError( + "Could not parse line, invalid sequence number:\n%s" % line + ) + elif line[0] == " ": + # Sequence consensus line... + assert len(ids) == len(seqs) + assert len(ids) > 0 + assert seq_cols is not None + consensus = line[seq_cols] + assert not line[: seq_cols.start].strip() + assert not line[seq_cols.stop :].strip() + # Check for blank line (or end of file) + line = handle.readline() + assert line.strip() == "" + break + else: + # No consensus + break + line = handle.readline() + if not line: + break # end of file + + assert line.strip() == "" + assert seq_cols is not None + + # Confirm all same length + for s in seqs: + assert len(s) == len(seqs[0]) + if consensus: + assert len(consensus) == len(seqs[0]) + + # Loop over any remaining blocks... + done = False + while not done: + # There should be a blank line between each block. + # Also want to ignore any consensus line from the + # previous block. + while (not line) or line.strip() == "": + line = handle.readline() + if not line: + break # end of file + if not line: + break # end of file + + if line.split(None, 1)[0] in known_headers: + # Found concatenated alignment. + self._header = line + break + + for i in range(len(ids)): + if line[0] == " ": + raise ValueError("Unexpected line:\n%r" % line) + fields = line.rstrip().split() + + # We expect there to be two fields, there can be an optional + # "sequence number" field containing the letter count. + if len(fields) < 2 or len(fields) > 3: + raise ValueError("Could not parse line:\n%r" % line) + + if fields[0] != ids[i]: + raise ValueError( + "Identifiers out of order? Got '%s' but expected '%s'" + % (fields[0], ids[i]) + ) + + if fields[1] != line[seq_cols]: + start = len(fields[0]) + line[len(fields[0]) :].find(fields[1]) + if start != seq_cols.start: + raise ValueError("Old location %s -> %i:XX" % (seq_cols, start)) + end = start + len(fields[1]) + seq_cols = slice(start, end) + del start, end + + # Append the sequence + seqs[i] += fields[1] + assert len(seqs[i]) == len(seqs[0]) + + if len(fields) == 3: + # This MAY be an old style file with a letter count... + try: + letters = int(fields[2]) + except ValueError: + raise ValueError( + "Could not parse line, bad sequence number:\n%s" % line + ) from None + if len(seqs[i].replace("-", "")) != letters: + raise ValueError( + "Could not parse line, invalid sequence number:\n%s" % line + ) + + # Read in the next line + line = handle.readline() + # There should now be a consensus line + if consensus: + assert line[0] == " " + assert seq_cols is not None + consensus += line[seq_cols] + assert len(consensus) == len(seqs[0]) + assert not line[: seq_cols.start].strip() + assert not line[seq_cols.stop :].strip() + # Read in the next line + line = handle.readline() + + assert len(ids) == len(seqs) + if len(seqs) == 0 or len(seqs[0]) == 0: + raise StopIteration + + if ( + self.records_per_alignment is not None + and self.records_per_alignment != len(ids) + ): + raise ValueError( + "Found %i records in this alignment, told to expect %i" + % (len(ids), self.records_per_alignment) + ) + + records = (SeqRecord(Seq(s), id=i, description=i) for (i, s) in zip(ids, seqs)) + alignment = MultipleSeqAlignment(records) + # TODO - Handle alignment annotation better, for now + # mimic the old parser in Bio.Clustalw + if version: + alignment._version = version + if consensus: + alignment_length = len(seqs[0]) + if len(consensus) != alignment_length: + raise ValueError( + "Alignment length is %i, consensus length is %i, '%s'" + % (alignment_length, len(consensus), consensus) + ) + alignment.column_annotations["clustal_consensus"] = consensus + # For backward compatibility prior to .column_annotations: + alignment._star_info = consensus + return alignment diff --git a/code/lib/Bio/AlignIO/EmbossIO.py b/code/lib/Bio/AlignIO/EmbossIO.py new file mode 100644 index 0000000..b1ebd4d --- /dev/null +++ b/code/lib/Bio/AlignIO/EmbossIO.py @@ -0,0 +1,219 @@ +# Copyright 2008-2016 by Peter Cock. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Bio.AlignIO support for "emboss" alignment output from EMBOSS tools. + +You are expected to use this module via the Bio.AlignIO functions (or the +Bio.SeqIO functions if you want to work directly with the gapped sequences). + +This module contains a parser for the EMBOSS pairs/simple file format, for +example from the alignret, water and needle tools. +""" +from Bio.Align import MultipleSeqAlignment +from Bio.AlignIO.Interfaces import AlignmentIterator +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + + +class EmbossIterator(AlignmentIterator): + """Emboss alignment iterator. + + For reading the (pairwise) alignments from EMBOSS tools in what they + call the "pairs" and "simple" formats. + """ + + _header = None # for caching lines between __next__ calls + + def __next__(self): + """Parse the next alignment from the handle.""" + handle = self.handle + + if self._header is None: + line = handle.readline() + else: + # Header we saved from when we were parsing + # the previous alignment. + line = self._header + self._header = None + + if not line: + raise StopIteration + + while line.rstrip() != "#=======================================": + line = handle.readline() + if not line: + raise StopIteration + + length_of_seqs = None + number_of_seqs = None + ids = [] + header_dict = {} + + while line[0] == "#": + # Read in the rest of this alignment header, + # try and discover the number of records expected + # and their length + parts = line[1:].split(":", 1) + key = parts[0].lower().strip() + if key == "aligned_sequences": + number_of_seqs = int(parts[1].strip()) + assert len(ids) == 0 + # Should now expect the record identifiers... + for i in range(number_of_seqs): + line = handle.readline() + parts = line[1:].strip().split(":", 1) + assert i + 1 == int(parts[0].strip()) + ids.append(parts[1].strip()) + assert len(ids) == number_of_seqs + if key == "length": + length_of_seqs = int(parts[1].strip()) + + # Parse the rest of the header + if key == "identity": + header_dict["identity"] = int(parts[1].strip().split("/")[0]) + if key == "similarity": + header_dict["similarity"] = int(parts[1].strip().split("/")[0]) + if key == "gaps": + header_dict["gaps"] = int(parts[1].strip().split("/")[0]) + if key == "score": + header_dict["score"] = float(parts[1].strip()) + + # And read in another line... + line = handle.readline() + + if number_of_seqs is None: + raise ValueError("Number of sequences missing!") + if length_of_seqs is None: + raise ValueError("Length of sequences missing!") + + if ( + self.records_per_alignment is not None + and self.records_per_alignment != number_of_seqs + ): + raise ValueError( + "Found %i records in this alignment, told to expect %i" + % (number_of_seqs, self.records_per_alignment) + ) + + seqs = [""] * len(ids) + seq_starts = [] + index = 0 + + # Parse the seqs + while line: + if len(line) > 21: + id_start = line[:21].strip().split(None, 1) + seq_end = line[21:].strip().split(None, 1) + if len(id_start) == 2 and len(seq_end) == 2: + # identifier, seq start position, seq, seq end position + # (an aligned seq is broken up into multiple lines) + id, start = id_start + seq, end = seq_end + if start >= end: + # Special case, either a single letter is present, + # or no letters at all. + if seq.replace("-", "") == "": + start = int(start) + end = int(end) + else: + start = int(start) - 1 + end = int(end) + else: + assert seq.replace("-", "") != "", repr(line) + start = int(start) - 1 # python counting + end = int(end) + + if index < 0 or index >= number_of_seqs: + raise ValueError( + "Expected index %i in range [0,%i)" + % (index, number_of_seqs) + ) + # The identifier is truncated... + assert id == ids[index] or id == ids[index][: len(id)] + + if len(seq_starts) == index: + # Record the start + seq_starts.append(start) + + # Check the start... + if start >= end: + assert seq.replace("-", "") == "", line + elif start - seq_starts[index] != len(seqs[index].replace("-", "")): + raise ValueError( + "Found %i chars so far for sequence %i (%s, %r), line says start %i:\n%s" + % ( + len(seqs[index].replace("-", "")), + index, + id, + seqs[index], + start, + line, + ) + ) + seqs[index] += seq + + # Check the end ... + if end != seq_starts[index] + len(seqs[index].replace("-", "")): + raise ValueError( + "Found %i chars so far for sequence %i (%s, %r, start=%i), file says end %i:\n%s" + % ( + len(seqs[index].replace("-", "")), + index, + id, + seqs[index], + seq_starts[index], + end, + line, + ) + ) + + index += 1 + if index >= number_of_seqs: + index = 0 + else: + # just a start value, this is just alignment annotation (?) + # print("Skipping: " + line.rstrip()) + pass + elif line.strip() == "": + # Just a spacer? + pass + else: + raise ValueError("Unrecognised EMBOSS pairwise line: %r\n" % line) + + line = handle.readline() + if ( + line.rstrip() == "#---------------------------------------" + or line.rstrip() == "#=======================================" + ): + # End of alignment + self._header = line + break + + assert index == 0 + + if ( + self.records_per_alignment is not None + and self.records_per_alignment != len(ids) + ): + raise ValueError( + "Found %i records in this alignment, told to expect %i" + % (len(ids), self.records_per_alignment) + ) + + records = [] + for id, seq in zip(ids, seqs): + if len(seq) != length_of_seqs: + # EMBOSS 2.9.0 is known to use spaces instead of minus signs + # for leading gaps, and thus fails to parse. This old version + # is still used as of Dec 2008 behind the EBI SOAP webservice: + # http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl + raise ValueError( + "Error parsing alignment - sequences of " + "different length? You could be using an " + "old version of EMBOSS." + ) + records.append(SeqRecord(Seq(seq), id=id, description=id)) + return MultipleSeqAlignment(records, annotations=header_dict) diff --git a/code/lib/Bio/AlignIO/FastaIO.py b/code/lib/Bio/AlignIO/FastaIO.py new file mode 100644 index 0000000..9816253 --- /dev/null +++ b/code/lib/Bio/AlignIO/FastaIO.py @@ -0,0 +1,344 @@ +# Copyright 2008-2016 by Peter Cock. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Bio.AlignIO support for "fasta-m10" output from Bill Pearson's FASTA tools. + +You are expected to use this module via the Bio.AlignIO functions (or the +Bio.SeqIO functions if you want to work directly with the gapped sequences). + +This module contains a parser for the pairwise alignments produced by Bill +Pearson's FASTA tools, for use from the Bio.AlignIO interface where it is +referred to as the "fasta-m10" file format (as we only support the machine +readable output format selected with the -m 10 command line option). + +This module does NOT cover the generic "fasta" file format originally +developed as an input format to the FASTA tools. The Bio.AlignIO and +Bio.SeqIO both use the Bio.SeqIO.FastaIO module to deal with these files, +which can also be used to store a multiple sequence alignments. +""" +from Bio.Align import MultipleSeqAlignment +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + + +def _extract_alignment_region(alignment_seq_with_flanking, annotation): + """Extract alignment region (PRIVATE). + + Helper function for the main parsing code. + + To get the actual pairwise alignment sequences, we must first + translate the un-gapped sequence based coordinates into positions + in the gapped sequence (which may have a flanking region shown + using leading - characters). To date, I have never seen any + trailing flanking region shown in the m10 file, but the + following code should also cope with that. + + Note that this code seems to work fine even when the "sq_offset" + entries are present as a result of using the -X command line option. + """ + align_stripped = alignment_seq_with_flanking.strip("-") + display_start = int(annotation["al_display_start"]) + if int(annotation["al_start"]) <= int(annotation["al_stop"]): + start = int(annotation["al_start"]) - display_start + end = int(annotation["al_stop"]) - display_start + 1 + else: + # FASTA has flipped this sequence... + start = display_start - int(annotation["al_start"]) + end = display_start - int(annotation["al_stop"]) + 1 + + end += align_stripped.count("-") + if start < 0 or start >= end or end > len(align_stripped): + raise ValueError( + "Problem with sequence start/stop,\n%s[%i:%i]\n%s" + % (alignment_seq_with_flanking, start, end, annotation) + ) + return align_stripped[start:end] + + +def FastaM10Iterator(handle, seq_count=None): + """Alignment iterator for the FASTA tool's pairwise alignment output. + + This is for reading the pairwise alignments output by Bill Pearson's + FASTA program when called with the -m 10 command line option for machine + readable output. For more details about the FASTA tools, see the website + http://fasta.bioch.virginia.edu/ and the paper: + + W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448 + + This class is intended to be used via the Bio.AlignIO.parse() function + by specifying the format as "fasta-m10" as shown in the following code:: + + from Bio import AlignIO + handle = ... + for a in AlignIO.parse(handle, "fasta-m10"): + assert len(a) == 2, "Should be pairwise!" + print("Alignment length %i" % a.get_alignment_length()) + for record in a: + print("%s %s %s" % (record.seq, record.name, record.id)) + + Note that this is not a full blown parser for all the information + in the FASTA output - for example, most of the header and all of the + footer is ignored. Also, the alignments are not batched according to + the input queries. + + Also note that there can be up to about 30 letters of flanking region + included in the raw FASTA output as contextual information. This is NOT + part of the alignment itself, and is not included in the resulting + MultipleSeqAlignment objects returned. + """ + state_PREAMBLE = -1 + state_NONE = 0 + state_QUERY_HEADER = 1 + state_ALIGN_HEADER = 2 + state_ALIGN_QUERY = 3 + state_ALIGN_MATCH = 4 + state_ALIGN_CONS = 5 + + def build_hsp(): + if not query_tags and not match_tags: + raise ValueError("No data for query %r, match %r" % (query_id, match_id)) + assert query_tags, query_tags + assert match_tags, match_tags + evalue = align_tags.get("fa_expect") + tool = global_tags.get("tool", "").upper() + + q = _extract_alignment_region(query_seq, query_tags) + if tool in ["TFASTX"] and len(match_seq) == len(q): + m = match_seq + # Quick hack until I can work out how -, * and / characters + # and the apparent mix of aa and bp coordinates works. + else: + m = _extract_alignment_region(match_seq, match_tags) + if len(q) != len(m): + raise ValueError( + f"""\ +Darn... amino acids vs nucleotide coordinates? +tool: {tool} +query_seq: {query_seq} +query_tags: {query_tags} +{q} length: {len(q)} +match_seq: {match_seq} +match_tags: {match_tags} +{m} length: {len(m)} +handle.name: {handle.name} +""" + ) + + annotations = {} + records = [] + + # Want to record both the query header tags, and the alignment tags. + annotations.update(header_tags) + annotations.update(align_tags) + + # Query + # ===== + record = SeqRecord( + Seq(q), + id=query_id, + name="query", + description=query_descr, + annotations={"original_length": int(query_tags["sq_len"])}, + ) + # TODO - handle start/end coordinates properly. Short term hack for now: + record._al_start = int(query_tags["al_start"]) + record._al_stop = int(query_tags["al_stop"]) + + # TODO - Can FASTA output RNA? + if "sq_type" in query_tags: + if query_tags["sq_type"] == "D": + record.annotations["molecule_type"] = "DNA" + elif query_tags["sq_type"] == "p": + record.annotations["molecule_type"] = "protein" + + records.append(record) + + # Match + # ===== + record = SeqRecord( + Seq(m), + id=match_id, + name="match", + description=match_descr, + annotations={"original_length": int(match_tags["sq_len"])}, + ) + # TODO - handle start/end coordinates properly. Short term hack for now: + record._al_start = int(match_tags["al_start"]) + record._al_stop = int(match_tags["al_stop"]) + + if "sq_type" in match_tags: + if match_tags["sq_type"] == "D": + record.annotations["molecule_type"] = "DNA" + elif match_tags["sq_type"] == "p": + record.annotations["molecule_type"] = "protein" + + records.append(record) + + return MultipleSeqAlignment(records, annotations=annotations) + + state = state_PREAMBLE + query_id = None + match_id = None + query_descr = "" + match_descr = "" + global_tags = {} + header_tags = {} + align_tags = {} + query_tags = {} + match_tags = {} + query_seq = "" + match_seq = "" + cons_seq = "" + for line in handle: + if ">>>" in line and not line.startswith(">>>"): + if query_id and match_id: + # This happens on old FASTA output which lacked an end of + # query >>><<< marker line. + yield build_hsp() + state = state_NONE + query_descr = line[line.find(">>>") + 3 :].strip() + query_id = query_descr.split(None, 1)[0] + match_id = None + header_tags = {} + align_tags = {} + query_tags = {} + match_tags = {} + query_seq = "" + match_seq = "" + cons_seq = "" + elif line.startswith("!! No "): + # e.g. + # !! No library sequences with E() < 0.5 + # or on more recent versions, + # No sequences with E() < 0.05 + assert state == state_NONE + assert not header_tags + assert not align_tags + assert not match_tags + assert not query_tags + assert match_id is None + assert not query_seq + assert not match_seq + assert not cons_seq + query_id = None + elif line.strip() in [">>><<<", ">>>///"]: + # End of query, possible end of all queries + if query_id and match_id: + yield build_hsp() + state = state_NONE + query_id = None + match_id = None + header_tags = {} + align_tags = {} + query_tags = {} + match_tags = {} + query_seq = "" + match_seq = "" + cons_seq = "" + elif line.startswith(">>>"): + # Should be start of a match! + assert query_id is not None + assert line[3:].split(", ", 1)[0] == query_id, line + assert match_id is None + assert not header_tags + assert not align_tags + assert not query_tags + assert not match_tags + assert not match_seq + assert not query_seq + assert not cons_seq + state = state_QUERY_HEADER + elif line.startswith(">>"): + # Should now be at start of a match alignment! + if query_id and match_id: + yield build_hsp() + align_tags = {} + query_tags = {} + match_tags = {} + query_seq = "" + match_seq = "" + cons_seq = "" + match_descr = line[2:].strip() + match_id = match_descr.split(None, 1)[0] + state = state_ALIGN_HEADER + elif line.startswith(">--"): + # End of one HSP + assert query_id and match_id, line + yield build_hsp() + # Clean up read for next HSP + # but reuse header_tags + align_tags = {} + query_tags = {} + match_tags = {} + query_seq = "" + match_seq = "" + cons_seq = "" + state = state_ALIGN_HEADER + elif line.startswith(">"): + if state == state_ALIGN_HEADER: + # Should be start of query alignment seq... + assert query_id is not None, line + assert match_id is not None, line + assert query_id.startswith(line[1:].split(None, 1)[0]), line + state = state_ALIGN_QUERY + elif state == state_ALIGN_QUERY: + # Should be start of match alignment seq + assert query_id is not None, line + assert match_id is not None, line + assert match_id.startswith(line[1:].split(None, 1)[0]), line + state = state_ALIGN_MATCH + elif state == state_NONE: + # Can get > as the last line of a histogram + pass + else: + raise RuntimeError("state %i got %r" % (state, line)) + elif line.startswith("; al_cons"): + assert state == state_ALIGN_MATCH, line + state = state_ALIGN_CONS + # Next line(s) should be consensus seq... + elif line.startswith("; "): + if ": " in line: + key, value = [s.strip() for s in line[2:].split(": ", 1)] + else: + import warnings + from Bio import BiopythonParserWarning + + # Seen in lalign36, specifically version 36.3.4 Apr, 2011 + # Fixed in version 36.3.5b Oct, 2011(preload8) + warnings.warn( + "Missing colon in line: %r" % line, BiopythonParserWarning + ) + try: + key, value = [s.strip() for s in line[2:].split(" ", 1)] + except ValueError: + raise ValueError("Bad line: %r" % line) from None + if state == state_QUERY_HEADER: + header_tags[key] = value + elif state == state_ALIGN_HEADER: + align_tags[key] = value + elif state == state_ALIGN_QUERY: + query_tags[key] = value + elif state == state_ALIGN_MATCH: + match_tags[key] = value + else: + raise RuntimeError("Unexpected state %r, %r" % (state, line)) + elif state == state_ALIGN_QUERY: + query_seq += line.strip() + elif state == state_ALIGN_MATCH: + match_seq += line.strip() + elif state == state_ALIGN_CONS: + cons_seq += line.strip("\n") + elif state == state_PREAMBLE: + if line.startswith("#"): + global_tags["command"] = line[1:].strip() + elif line.startswith(" version "): + global_tags["version"] = line[9:].strip() + elif " compares a " in line: + global_tags["tool"] = line[: line.find(" compares a ")].strip() + elif " searches a " in line: + global_tags["tool"] = line[: line.find(" searches a ")].strip() + else: + pass diff --git a/code/lib/Bio/AlignIO/Interfaces.py b/code/lib/Bio/AlignIO/Interfaces.py new file mode 100644 index 0000000..b53de30 --- /dev/null +++ b/code/lib/Bio/AlignIO/Interfaces.py @@ -0,0 +1,160 @@ +# Copyright 2008-2018 by Peter Cock. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""AlignIO support module (not for general use). + +Unless you are writing a new parser or writer for Bio.AlignIO, you should not +use this module. It provides base classes to try and simplify things. +""" + + +class AlignmentIterator: + """Base class for building MultipleSeqAlignment iterators. + + You should write a next() method to return Alignment + objects. You may wish to redefine the __init__ + method as well. + """ + + def __init__(self, handle, seq_count=None): + """Create an AlignmentIterator object. + + Arguments: + - handle - input file + - count - optional, expected number of records per alignment + Recommend for fasta file format. + + Note when subclassing: + - there should be a single non-optional argument, the handle, + and optional count IN THAT ORDER. + - you can add additional optional arguments. + + """ + self.handle = handle + self.records_per_alignment = seq_count + ##################################################### + # You may want to subclass this, for example # + # to read through the file to find the first record,# + # or if additional arguments are required. # + ##################################################### + + def __next__(self): + """Return the next alignment in the file. + + This method should be replaced by any derived class to do something + useful. + """ + raise NotImplementedError("This object should be subclassed") + ##################################################### + # You SHOULD subclass this, to split the file up # + # into your individual alignments and convert these # + # into MultipleSeqAlignment objects. # + ##################################################### + + def __iter__(self): + """Iterate over the entries as MultipleSeqAlignment objects. + + Example usage for (concatenated) PHYLIP files:: + + with open("many.phy","r") as myFile: + for alignment in PhylipIterator(myFile): + print("New alignment:") + for record in alignment: + print(record.id) + print(record.seq) + + """ + return iter(self.__next__, None) + + +class AlignmentWriter: + """Base class for building MultipleSeqAlignment writers. + + You should write a write_alignment() method. + You may wish to redefine the __init__ method as well. + """ + + def __init__(self, handle): + """Initialize the class.""" + self.handle = handle + + def write_file(self, alignments): + """Use this to write an entire file containing the given alignments. + + Arguments: + - alignments - A list or iterator returning MultipleSeqAlignment objects + + In general, this method can only be called once per file. + + This method should be replaced by any derived class to do something + useful. It should return the number of alignments.. + """ + raise NotImplementedError("This object should be subclassed") + ##################################################### + # You SHOULD subclass this, to write the alignment # + # objects to the file handle # + ##################################################### + + def clean(self, text): + """Use this to avoid getting newlines in the output.""" + return text.replace("\n", " ").replace("\r", " ") + + +class SequentialAlignmentWriter(AlignmentWriter): + """Base class for building MultipleSeqAlignment writers. + + This assumes each alignment can be simply appended to the file. + You should write a write_alignment() method. + You may wish to redefine the __init__ method as well. + """ + + def __init__(self, handle): + """Initialize the class.""" + self.handle = handle + + def write_file(self, alignments): + """Use this to write an entire file containing the given alignments. + + Arguments: + - alignments - A list or iterator returning MultipleSeqAlignment objects + + In general, this method can only be called once per file. + """ + self.write_header() + count = 0 + for alignment in alignments: + self.write_alignment(alignment) + count += 1 + self.write_footer() + return count + + def write_header(self): + """Use this to write any header. + + This method should be replaced by any derived class to do something + useful. + """ + pass + + def write_footer(self): + """Use this to write any footer. + + This method should be replaced by any derived class to do something + useful. + """ + pass + + def write_alignment(self, alignment): + """Use this to write a single alignment. + + This method should be replaced by any derived class to do something + useful. + """ + raise NotImplementedError("This object should be subclassed") + ##################################################### + # You SHOULD subclass this, to write the alignment # + # objects to the file handle # + ##################################################### diff --git a/code/lib/Bio/AlignIO/MafIO.py b/code/lib/Bio/AlignIO/MafIO.py new file mode 100644 index 0000000..787325e --- /dev/null +++ b/code/lib/Bio/AlignIO/MafIO.py @@ -0,0 +1,833 @@ +# Copyright 2011, 2012 by Andrew Sczesnak. All rights reserved. +# Revisions Copyright 2011, 2017 by Peter Cock. All rights reserved. +# Revisions Copyright 2014, 2015 by Adam Novak. All rights reserved. +# Revisions Copyright 2015, 2017 by Blaise Li. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Bio.AlignIO support for the "maf" multiple alignment format. + +The Multiple Alignment Format, described by UCSC, stores a series of +multiple alignments in a single file. It is suitable for whole-genome +to whole-genome alignments, metadata such as source chromosome, start +position, size, and strand can be stored. + +See http://genome.ucsc.edu/FAQ/FAQformat.html#format5 + +You are expected to use this module via the Bio.AlignIO functions(or the +Bio.SeqIO functions if you want to work directly with the gapped sequences). + +Coordinates in the MAF format are defined in terms of zero-based start +positions (like Python) and aligning region sizes. + +A minimal aligned region of length one and starting at first position in the +source sequence would have ``start == 0`` and ``size == 1``. + +As we can see on this example, ``start + size`` will give one more than the +zero-based end position. We can therefore manipulate ``start`` and +``start + size`` as python list slice boundaries. + +For an inclusive end coordinate, we need to use ``end = start + size - 1``. +A 1-column wide alignment would have ``start == end``. +""" +import os + +from itertools import islice +from sqlite3 import dbapi2 + +from Bio.Align import MultipleSeqAlignment +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + +from .Interfaces import SequentialAlignmentWriter + +MAFINDEX_VERSION = 2 + + +class MafWriter(SequentialAlignmentWriter): + """Accepts a MultipleSeqAlignment object, writes a MAF file.""" + + def write_header(self): + """Write the MAF header.""" + self.handle.write("##maf version=1 scoring=none\n") + self.handle.write("# generated by Biopython\n\n") + + def _write_record(self, record): + """Write a single SeqRecord object to an 's' line in a MAF block (PRIVATE).""" + # convert biopython-style 1/-1 strand to MAF-style +/- strand + if record.annotations.get("strand") == 1: + strand = "+" + elif record.annotations.get("strand") == -1: + strand = "-" + else: + # TODO: issue warning? + strand = "+" + + fields = [ + "s", + # In the MAF file format, spaces are not allowed in the id + "%-40s" % record.id.replace(" ", "_"), + "%15s" % record.annotations.get("start", 0), + "%5s" + % record.annotations.get("size", len(str(record.seq).replace("-", ""))), + strand, + "%15s" % record.annotations.get("srcSize", 0), + str(record.seq), + ] + self.handle.write("%s\n" % " ".join(fields)) + + def write_alignment(self, alignment): + """Write a complete alignment to a MAF block. + + Writes every SeqRecord in a MultipleSeqAlignment object to its own + MAF block (beginning with an 'a' line, containing 's' lines). + """ + if not isinstance(alignment, MultipleSeqAlignment): + raise TypeError("Expected an alignment object") + + if len({len(x) for x in alignment}) > 1: + raise ValueError("Sequences must all be the same length") + + # We allow multiple sequences with the same IDs; for example, there may + # be a MAF aligning the + and - strands of the same sequence together. + + # for now, use ._annotations private property, but restrict keys to those + # specifically supported by the MAF format, according to spec + try: + anno = " ".join( + [ + "%s=%s" % (x, y) + for x, y in alignment._annotations.items() + if x in ("score", "pass") + ] + ) + except AttributeError: + anno = "score=0.00" + + self.handle.write("a %s\n" % (anno,)) + + recs_out = 0 + + for record in alignment: + self._write_record(record) + + recs_out += 1 + + self.handle.write("\n") + + return recs_out + + +# Invalid function name according to pylint, but kept for compatibility +# with Bio* conventions. +def MafIterator(handle, seq_count=None): + """Iterate over a MAF file handle as MultipleSeqAlignment objects. + + Iterates over lines in a MAF file-like object (handle), yielding + MultipleSeqAlignment objects. SeqRecord IDs generally correspond to + species names. + """ + in_a_bundle = False + + annotations = [] + records = [] + + while True: + # allows parsing of the last bundle without duplicating code + try: + line = next(handle) + except StopIteration: + line = "" + + if in_a_bundle: + if line.startswith("s"): + # add a SeqRecord to the bundle + line_split = line.strip().split() + + if len(line_split) != 7: + raise ValueError( + "Error parsing alignment - 's' line must have 7 fields" + ) + + # convert MAF-style +/- strand to biopython-type 1/-1 + if line_split[4] == "+": + strand = 1 + elif line_split[4] == "-": + strand = -1 + else: + # TODO: issue warning, set to 0? + strand = 1 + + # s (literal), src (ID), start, size, strand, srcSize, text (sequence) + anno = { + "start": int(line_split[2]), + "size": int(line_split[3]), + "strand": strand, + "srcSize": int(line_split[5]), + } + + sequence = line_split[6] + + # interpret a dot/period to mean the same as the first sequence + if "." in sequence: + if not records: + raise ValueError( + "Found dot/period in first sequence of alignment" + ) + + ref = records[0].seq + new = [] + + for (letter, ref_letter) in zip(sequence, ref): + new.append(ref_letter if letter == "." else letter) + + sequence = "".join(new) + + records.append( + SeqRecord( + Seq(sequence), + id=line_split[1], + name=line_split[1], + description="", + annotations=anno, + ) + ) + elif line.startswith("i"): + # TODO: information about what is in the aligned species DNA before + # and after the immediately preceding "s" line + pass + elif line.startswith("e"): + # TODO: information about the size of the gap between the alignments + # that span the current block + pass + elif line.startswith("q"): + # TODO: quality of each aligned base for the species. + # Need to find documentation on this, looks like ASCII 0-9 or gap? + # Can then store in each SeqRecord's .letter_annotations dictionary, + # perhaps as the raw string or turned into integers / None for gap? + pass + elif line.startswith("#"): + # ignore comments + # (not sure whether comments + # are in the maf specification, though) + pass + elif not line.strip(): + # end a bundle of records + if seq_count is not None: + assert len(records) == seq_count + + alignment = MultipleSeqAlignment(records) + # TODO - Introduce an annotated alignment class? + # See also Bio/AlignIO/FastaIO.py for same requirement. + # For now, store the annotation a new private property: + alignment._annotations = annotations + + yield alignment + + in_a_bundle = False + + annotations = [] + records = [] + else: + raise ValueError( + "Error parsing alignment - unexpected line:\n%s" % (line,) + ) + elif line.startswith("a"): + # start a bundle of records + in_a_bundle = True + annot_strings = line.strip().split()[1:] + if len(annot_strings) != line.count("="): + raise ValueError("Error parsing alignment - invalid key in 'a' line") + annotations = dict(a_string.split("=") for a_string in annot_strings) + elif line.startswith("#"): + # ignore comments + pass + elif not line: + break + + +class MafIndex: + """Index for a MAF file. + + The index is a sqlite3 database that is built upon creation of the object + if necessary, and queried when methods *search* or *get_spliced* are + used. + """ + + def __init__(self, sqlite_file, maf_file, target_seqname): + """Indexes or loads the index of a MAF file.""" + self._target_seqname = target_seqname + # example: Tests/MAF/ucsc_mm9_chr10.mafindex + self._index_filename = sqlite_file + # example: /home/bli/src/biopython/Tests/MAF + self._relative_path = os.path.abspath(os.path.dirname(sqlite_file)) + # example: Tests/MAF/ucsc_mm9_chr10.maf + self._maf_file = maf_file + + self._maf_fp = open(self._maf_file) + + # if sqlite_file exists, use the existing db, otherwise index the file + if os.path.isfile(sqlite_file): + self._con = dbapi2.connect(sqlite_file) + self._record_count = self.__check_existing_db() + else: + self._con = dbapi2.connect(sqlite_file) + self._record_count = self.__make_new_index() + + # lastly, setup a MafIterator pointing at the open maf_file + self._mafiter = MafIterator(self._maf_fp) + + def __check_existing_db(self): + """Perform basic sanity checks upon loading an existing index (PRIVATE).""" + try: + idx_version = int( + self._con.execute( + "SELECT value FROM meta_data WHERE key = 'version'" + ).fetchone()[0] + ) + if idx_version != MAFINDEX_VERSION: + msg = "\n".join( + [ + "Index version (%s) incompatible with this version " + "of MafIndex" % idx_version, + "You might erase the existing index %s " + "for it to be rebuilt." % self._index_filename, + ] + ) + raise ValueError(msg) + + filename = self._con.execute( + "SELECT value FROM meta_data WHERE key = 'filename'" + ).fetchone()[0] + # Compute absolute path of the original maf file + if os.path.isabs(filename): + # It was already stored as absolute + tmp_mafpath = filename + else: + # It should otherwise have been stored as relative to the index + # Would be stored with Unix / path separator, so convert + # it to the local OS path separator here: + tmp_mafpath = os.path.join( + self._relative_path, filename.replace("/", os.path.sep) + ) + if tmp_mafpath != os.path.abspath(self._maf_file): + # Original and given absolute paths differ. + raise ValueError( + "Index uses a different file (%s != %s)" + % (filename, self._maf_file) + ) + + db_target = self._con.execute( + "SELECT value FROM meta_data WHERE key = 'target_seqname'" + ).fetchone()[0] + if db_target != self._target_seqname: + raise ValueError( + "Provided database indexed for %s, expected %s" + % (db_target, self._target_seqname) + ) + + record_count = int( + self._con.execute( + "SELECT value FROM meta_data WHERE key = 'record_count'" + ).fetchone()[0] + ) + if record_count == -1: + raise ValueError("Unfinished/partial database provided") + + records_found = int( + self._con.execute("SELECT COUNT(*) FROM offset_data").fetchone()[0] + ) + if records_found != record_count: + raise ValueError( + "Expected %s records, found %s. Corrupt index?" + % (record_count, records_found) + ) + + return records_found + + except (dbapi2.OperationalError, dbapi2.DatabaseError) as err: + raise ValueError("Problem with SQLite database: %s" % err) from None + + def __make_new_index(self): + """Read MAF file and generate SQLite index (PRIVATE).""" + # make the tables + self._con.execute("CREATE TABLE meta_data (key TEXT, value TEXT);") + self._con.execute( + "INSERT INTO meta_data (key, value) VALUES ('version', %s);" + % MAFINDEX_VERSION + ) + self._con.execute( + "INSERT INTO meta_data (key, value) VALUES ('record_count', -1);" + ) + self._con.execute( + "INSERT INTO meta_data (key, value) VALUES ('target_seqname', '%s');" + % (self._target_seqname,) + ) + # Determine whether to store maf file as relative to the index or absolute + # See https://github.com/biopython/biopython/pull/381 + if not os.path.isabs(self._maf_file) and not os.path.isabs( + self._index_filename + ): + # Since the user gave both maf file and index as relative paths, + # we will store the maf file relative to the index. + # Note for cross platform use (e.g. shared drive over SAMBA), + # convert any Windows slash into Unix style for rel paths. + # example: ucsc_mm9_chr10.maf + mafpath = os.path.relpath(self._maf_file, self._relative_path).replace( + os.path.sep, "/" + ) + elif ( + os.path.dirname(os.path.abspath(self._maf_file)) + os.path.sep + ).startswith(self._relative_path + os.path.sep): + # Since maf file is in same directory or sub directory, + # might as well make this into a relative path: + mafpath = os.path.relpath(self._maf_file, self._relative_path).replace( + os.path.sep, "/" + ) + else: + # Default to storing as an absolute path + # example: /home/bli/src/biopython/Tests/MAF/ucsc_mm9_chr10.maf + mafpath = os.path.abspath(self._maf_file) + self._con.execute( + "INSERT INTO meta_data (key, value) VALUES ('filename', '%s');" % (mafpath,) + ) + self._con.execute( + "CREATE TABLE offset_data (bin INTEGER, start INTEGER, end INTEGER, offset INTEGER);" + ) + + insert_count = 0 + + # iterate over the entire file and insert in batches + mafindex_func = self.__maf_indexer() + + while True: + batch = list(islice(mafindex_func, 100)) + if not batch: + break + + # batch is made from self.__maf_indexer(), + # which yields zero-based "inclusive" start and end coordinates + self._con.executemany( + "INSERT INTO offset_data (bin, start, end, offset) VALUES (?,?,?,?);", + batch, + ) + self._con.commit() + insert_count += len(batch) + + # then make indexes on the relevant fields + self._con.execute("CREATE INDEX IF NOT EXISTS bin_index ON offset_data(bin);") + self._con.execute( + "CREATE INDEX IF NOT EXISTS start_index ON offset_data(start);" + ) + self._con.execute("CREATE INDEX IF NOT EXISTS end_index ON offset_data(end);") + + self._con.execute( + "UPDATE meta_data SET value = '%s' WHERE key = 'record_count'" + % (insert_count,) + ) + + self._con.commit() + + return insert_count + + def __maf_indexer(self): + """Return index information for each bundle (PRIVATE). + + Yields index information for each bundle in the form of + (bin, start, end, offset) tuples where start and end are + 0-based inclusive coordinates. + """ + line = self._maf_fp.readline() + + while line: + if line.startswith("a"): + # note the offset + offset = self._maf_fp.tell() - len(line) + + # search the following lines for a match to target_seqname + while True: + line = self._maf_fp.readline() + + if not line.strip() or line.startswith("a"): + # Empty line or new alignment record + raise ValueError( + "Target for indexing (%s) not found in this bundle" + % (self._target_seqname,) + ) + elif line.startswith("s"): + # s (literal), src (ID), start, size, strand, srcSize, text (sequence) + line_split = line.strip().split() + + if line_split[1] == self._target_seqname: + start = int(line_split[2]) + size = int(line_split[3]) + if size != len(line_split[6].replace("-", "")): + raise ValueError( + "Invalid length for target coordinates " + "(expected %s, found %s)" + % (size, len(line_split[6].replace("-", ""))) + ) + + # "inclusive" end position is start + length - 1 + end = start + size - 1 + + # _ucscbin takes end-exclusive coordinates + yield (self._ucscbin(start, end + 1), start, end, offset) + + break + + line = self._maf_fp.readline() + + # TODO: check coordinate correctness for the two bin-related static methods + @staticmethod + def _region2bin(start, end): + """Find bins that a region may belong to (PRIVATE). + + Converts a region to a list of bins that it may belong to, including largest + and smallest bins. + """ + bins = [0, 1] + + bins.extend(range(1 + (start >> 26), 2 + ((end - 1) >> 26))) + bins.extend(range(9 + (start >> 23), 10 + ((end - 1) >> 23))) + bins.extend(range(73 + (start >> 20), 74 + ((end - 1) >> 20))) + bins.extend(range(585 + (start >> 17), 586 + ((end - 1) >> 17))) + + return set(bins) + + @staticmethod + def _ucscbin(start, end): + """Return the smallest bin a given region will fit into (PRIVATE). + + Adapted from http://genomewiki.ucsc.edu/index.php/Bin_indexing_system + """ + bin_offsets = [512 + 64 + 8 + 1, 64 + 8 + 1, 8 + 1, 1, 0] + + _bin_first_shift = 17 + _bin_next_shift = 3 + + start_bin = start + end_bin = end - 1 + + start_bin >>= _bin_first_shift + end_bin >>= _bin_first_shift + + for bin_offset in bin_offsets: + if start_bin == end_bin: + return bin_offset + start_bin + start_bin >>= _bin_next_shift + end_bin >>= _bin_next_shift + + return 0 + + def _get_record(self, offset): + """Retrieve a single MAF record located at the offset provided (PRIVATE).""" + self._maf_fp.seek(offset) + return next(self._mafiter) + + def search(self, starts, ends): + """Search index database for MAF records overlapping ranges provided. + + Returns *MultipleSeqAlignment* results in order by start, then end, then + internal offset field. + + *starts* should be a list of 0-based start coordinates of segments in the reference. + *ends* should be the list of the corresponding segment ends + (in the half-open UCSC convention: + http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/). + """ + # verify the provided exon coordinates + if len(starts) != len(ends): + raise ValueError("Every position in starts must have a match in ends") + + # Could it be safer to sort the (exonstart, exonend) pairs? + for exonstart, exonend in zip(starts, ends): + exonlen = exonend - exonstart + if exonlen < 1: + raise ValueError( + "Exon coordinates (%d, %d) invalid: exon length (%d) < 1" + % (exonstart, exonend, exonlen) + ) + con = self._con + + # Keep track of what blocks have already been yielded + # in order to avoid duplicating them + # (see https://github.com/biopython/biopython/issues/1083) + yielded_rec_coords = set() + # search for every exon + for exonstart, exonend in zip(starts, ends): + try: + possible_bins = ", ".join( + map(str, self._region2bin(exonstart, exonend)) + ) + except TypeError: + raise TypeError( + "Exon coordinates must be integers " + "(start=%d, end=%d)" % (exonstart, exonend) + ) from None + + # https://www.sqlite.org/lang_expr.html + # ----- + # The BETWEEN operator + # + # The BETWEEN operator is logically equivalent to a pair of + # comparisons. "x BETWEEN y AND z" is equivalent to "x>=y AND x<=z" + # except that with BETWEEN, the x expression is only evaluated + # once. The precedence of the BETWEEN operator is the same as the + # precedence as operators == and != and LIKE and groups left to + # right. + # ----- + + # We are testing overlap between the query segment and records in + # the index, using non-strict coordinates comparisons. + # The query segment end must be passed as end-inclusive + # The index should also have been build with end-inclusive + # end coordinates. + # See https://github.com/biopython/biopython/pull/1086#issuecomment-285069073 + + result = con.execute( + "SELECT DISTINCT start, end, offset FROM offset_data " + "WHERE bin IN (%s) " + "AND (end BETWEEN %s AND %s OR %s BETWEEN start AND end) " + "ORDER BY start, end, offset ASC;" + % (possible_bins, exonstart, exonend - 1, exonend - 1) + ) + + rows = result.fetchall() + + # rows come from the sqlite index, + # which should have been written using __make_new_index, + # so rec_start and rec_end should be zero-based "inclusive" coordinates + for rec_start, rec_end, offset in rows: + # Avoid yielding multiple time the same block + if (rec_start, rec_end) in yielded_rec_coords: + continue + else: + yielded_rec_coords.add((rec_start, rec_end)) + # Iterate through hits, fetching alignments from the MAF file + # and checking to be sure we've retrieved the expected record. + + fetched = self._get_record(int(offset)) + + for record in fetched: + if record.id == self._target_seqname: + # start and size come from the maf lines + start = record.annotations["start"] + # "inclusive" end is start + length - 1 + end = start + record.annotations["size"] - 1 + + if not (start == rec_start and end == rec_end): + raise ValueError( + "Expected %s-%s @ offset %s, found %s-%s" + % (rec_start, rec_end, offset, start, end) + ) + + yield fetched + + def get_spliced(self, starts, ends, strand=1): + """Return a multiple alignment of the exact sequence range provided. + + Accepts two lists of start and end positions on target_seqname, representing + exons to be spliced in silico. Returns a *MultipleSeqAlignment* of the + desired sequences spliced together. + + *starts* should be a list of 0-based start coordinates of segments in the reference. + *ends* should be the list of the corresponding segment ends + (in the half-open UCSC convention: + http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/). + + To ask for the alignment portion corresponding to the first 100 + nucleotides of the reference sequence, you would use + ``search([0], [100])`` + """ + # validate strand + if strand not in (1, -1): + raise ValueError("Strand must be 1 or -1, got %s" % strand) + + # pull all alignments that span the desired intervals + fetched = list(self.search(starts, ends)) + + # keep track of the expected letter count + # (sum of lengths of [start, end) segments, + # where [start, end) half-open) + expected_letters = sum(end - start for start, end in zip(starts, ends)) + + # if there's no alignment, return filler for the assembly of the length given + if len(fetched) == 0: + return MultipleSeqAlignment( + [SeqRecord(Seq("N" * expected_letters), id=self._target_seqname)] + ) + + # find the union of all IDs in these alignments + all_seqnames = {sequence.id for multiseq in fetched for sequence in multiseq} + + # split every record by base position + # key: sequence name + # value: dictionary + # key: position in the reference sequence + # value: letter(s) (including letters + # aligned to the "-" preceding the letter + # at the position in the reference, if any) + split_by_position = {seq_name: {} for seq_name in all_seqnames} + + # keep track of what the total number of (unspliced) letters should be + total_rec_length = 0 + + # track first strand encountered on the target seqname + ref_first_strand = None + + for multiseq in fetched: + # find the target_seqname in this MultipleSeqAlignment and use it to + # set the parameters for the rest of this iteration + for seqrec in multiseq: + if seqrec.id == self._target_seqname: + try: + if ref_first_strand is None: + ref_first_strand = seqrec.annotations["strand"] + + if ref_first_strand not in (1, -1): + raise ValueError("Strand must be 1 or -1") + elif ref_first_strand != seqrec.annotations["strand"]: + raise ValueError( + "Encountered strand='%s' on target seqname, " + "expected '%s'" + % (seqrec.annotations["strand"], ref_first_strand) + ) + except KeyError: + raise ValueError( + "No strand information for target seqname (%s)" + % self._target_seqname + ) from None + # length including gaps (i.e. alignment length) + rec_length = len(seqrec) + rec_start = seqrec.annotations["start"] + ungapped_length = seqrec.annotations["size"] + # inclusive end in zero-based coordinates of the reference + rec_end = rec_start + ungapped_length - 1 + # This is length in terms of actual letters in the reference + total_rec_length += ungapped_length + + # blank out these positions for every seqname + for seqrec in multiseq: + for pos in range(rec_start, rec_end + 1): + split_by_position[seqrec.id][pos] = "" + + break + # http://psung.blogspot.fr/2007/12/for-else-in-python.html + # https://docs.python.org/2/tutorial/controlflow.html#break-and-continue-statements-and-else-clauses-on-loops + else: + raise ValueError( + "Did not find %s in alignment bundle" % (self._target_seqname,) + ) + + # the true, chromosome/contig/etc position in the target seqname + real_pos = rec_start + + # loop over the alignment to fill split_by_position + for gapped_pos in range(0, rec_length): + for seqrec in multiseq: + # keep track of this position's value for the target seqname + if seqrec.id == self._target_seqname: + track_val = seqrec.seq[gapped_pos] + + # Here, a real_pos that corresponds to just after a series of "-" + # in the reference will "accumulate" the letters found in other sequences + # in front of the "-"s + split_by_position[seqrec.id][real_pos] += seqrec.seq[gapped_pos] + + # increment the real_pos counter only when non-gaps are found in + # the target_seqname, and we haven't reached the end of the record + if track_val != "-" and real_pos < rec_end: + real_pos += 1 + + # make sure the number of bp entries equals the sum of the record lengths + if len(split_by_position[self._target_seqname]) != total_rec_length: + raise ValueError( + "Target seqname (%s) has %s records, expected %s" + % ( + self._target_seqname, + len(split_by_position[self._target_seqname]), + total_rec_length, + ) + ) + + # translates a position in the target_seqname sequence to its gapped length + realpos_to_len = { + pos: len(gapped_fragment) + for pos, gapped_fragment in split_by_position[self._target_seqname].items() + if len(gapped_fragment) > 1 + } + + # splice together the exons + subseq = {} + + for seqid in all_seqnames: + seq_split = split_by_position[seqid] + seq_splice = [] + + filler_char = "N" if seqid == self._target_seqname else "-" + + # iterate from start to end, taking bases from split_by_position when + # they exist, using N or - for gaps when there is no alignment. + append = seq_splice.append + + for exonstart, exonend in zip(starts, ends): + # exonend is exclusive + for real_pos in range(exonstart, exonend): + # if this seqname has this position, add it + if real_pos in seq_split: + append(seq_split[real_pos]) + # if not, but it's in the target_seqname, add length-matched filler + elif real_pos in realpos_to_len: + append(filler_char * realpos_to_len[real_pos]) + # it's not in either, so add a single filler character + else: + append(filler_char) + + subseq[seqid] = "".join(seq_splice) + + # make sure we're returning the right number of letters + if len(subseq[self._target_seqname].replace("-", "")) != expected_letters: + raise ValueError( + "Returning %s letters for target seqname (%s), expected %s" + % ( + len(subseq[self._target_seqname].replace("-", "")), + self._target_seqname, + expected_letters, + ) + ) + + # check to make sure all sequences are the same length as the target seqname + ref_subseq_len = len(subseq[self._target_seqname]) + + for seqid, seq in subseq.items(): + if len(seq) != ref_subseq_len: + raise ValueError( + "Returning length %s for %s, expected %s" + % (len(seq), seqid, ref_subseq_len) + ) + + # finally, build a MultipleSeqAlignment object for our final sequences + result_multiseq = [] + + for seqid, seq in subseq.items(): + seq = Seq(seq) + + seq = seq if strand == ref_first_strand else seq.reverse_complement() + + result_multiseq.append(SeqRecord(seq, id=seqid, name=seqid, description="")) + + return MultipleSeqAlignment(result_multiseq) + + def __repr__(self): + """Return a string representation of the index.""" + return "MafIO.MafIndex(%r, target_seqname=%r)" % ( + self._maf_fp.name, + self._target_seqname, + ) + + def __len__(self): + """Return the number of records in the index.""" + return self._record_count diff --git a/code/lib/Bio/AlignIO/MauveIO.py b/code/lib/Bio/AlignIO/MauveIO.py new file mode 100644 index 0000000..b5f597c --- /dev/null +++ b/code/lib/Bio/AlignIO/MauveIO.py @@ -0,0 +1,349 @@ +# Copyright 2015-2015 by Eric Rasche. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Bio.AlignIO support for "xmfa" output from Mauve/ProgressiveMauve. + +You are expected to use this module via the Bio.AlignIO functions (or the +Bio.SeqIO functions if you want to work directly with the gapped sequences). + +For example, consider a progressiveMauve alignment file containing the following:: + + #FormatVersion Mauve1 + #Sequence1File a.fa + #Sequence1Entry 1 + #Sequence1Format FastA + #Sequence2File b.fa + #Sequence2Entry 2 + #Sequence2Format FastA + #Sequence3File c.fa + #Sequence3Entry 3 + #Sequence3Format FastA + #BackboneFile three.xmfa.bbcols + > 1:0-0 + a.fa + -------------------------------------------------------------------------------- + -------------------------------------------------------------------------------- + -------------------------------------------------------------------------------- + > 2:5417-5968 + b.fa + TTTAAACATCCCTCGGCCCGTCGCCCTTTTATAATAGCAGTACGTGAGAGGAGCGCCCTAAGCTTTGGGAAATTCAAGC- + -------------------------------------------------------------------------------- + CTGGAACGTACTTGCTGGTTTCGCTACTATTTCAAACAAGTTAGAGGCCGTTACCTCGGGCGAACGTATAAACCATTCTG + > 3:9476-10076 - c.fa + TTTAAACACCTTTTTGGATG--GCCCAGTTCGTTCAGTTGTG-GGGAGGAGATCGCCCCAAACGTATGGTGAGTCGGGCG + TTTCCTATAGCTATAGGACCAATCCACTTACCATACGCCCGGCGTCGCCCAGTCCGGTTCGGTACCCTCCATGACCCACG + ---------------------------------------------------------AAATGAGGGCCCAGGGTATGCTT + = + > 2:5969-6015 + b.fa + ----------------------- + GGGCGAACGTATAAACCATTCTG + > 3:9429-9476 - c.fa + TTCGGTACCCTCCATGACCCACG + AAATGAGGGCCCAGGGTATGCTT + +This is a multiple sequence alignment with multiple aligned sections, so you +would probably load this using the Bio.AlignIO.parse() function: + + >>> from Bio import AlignIO + >>> align = AlignIO.parse("Mauve/simple_short.xmfa", "mauve") + >>> alignments = list(align) + >>> for aln in alignments: + ... print(aln) + ... + Alignment with 3 rows and 240 columns + --------------------------------------------...--- a.fa + TTTAAACATCCCTCGGCCCGTCGCCCTTTTATAATAGCAGTACG...CTG b.fa/5416-5968 + TTTAAACACCTTTTTGGATG--GCCCAGTTCGTTCAGTTGTG-G...CTT c.fa/9475-10076 + Alignment with 2 rows and 46 columns + -----------------------GGGCGAACGTATAAACCATTCTG b.fa/5968-6015 + TTCGGTACCCTCCATGACCCACGAAATGAGGGCCCAGGGTATGCTT c.fa/9428-9476 + +Additional information is extracted from the XMFA file and available through +the annotation attribute of each record:: + + >>> for record in alignments[0]: + ... print(record.id, len(record)) + ... print(" start: %d, end: %d, strand: %d" %( + ... record.annotations['start'], record.annotations['end'], + ... record.annotations['strand'])) + ... + a.fa 240 + start: 0, end: 0, strand: 1 + b.fa/5416-5968 240 + start: 5416, end: 5968, strand: 1 + c.fa/9475-10076 240 + start: 9475, end: 10076, strand: -1 + +""" +import re + +from Bio.Align import MultipleSeqAlignment +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + +from .Interfaces import AlignmentIterator +from .Interfaces import SequentialAlignmentWriter + + +XMFA_HEADER_REGEX = re.compile( + r"> (?P\d+):(?P\d+)-(?P\d+) (?P[+-]) (?P.*)" +) +XMFA_HEADER_REGEX_BIOPYTHON = re.compile( + r"> (?P\d+):(?P\d+)-(?P\d+) (?P[+-]) (?P[^#]*) # (?P.*)" +) +ID_LINE_FMT = "> {seq_name}:{start}-{end} {strand} {filename} # {ugly_hack}" + + +def _identifier_split(identifier): + """Return (name, start, end) string tuple from an identifier (PRIVATE).""" + id, loc, strand = identifier.split(":") + start, end = map(int, loc.split("-")) + start -= 1 + return id, start, end, strand + + +class MauveWriter(SequentialAlignmentWriter): + """Mauve/XMFA alignment writer.""" + + def __init__(self, *args, **kwargs): + """Initialize the class.""" + super().__init__(*args, **kwargs) + self._wrote_header = False + self._wrote_first = False + + def write_alignment(self, alignment): + """Use this to write (another) single alignment to an open file. + + Note that sequences and their annotation are recorded + together (rather than having a block of annotation followed + by a block of aligned sequences). + """ + count = len(alignment) + + self._length_of_sequences = alignment.get_alignment_length() + + # NOTE - For now, the alignment object does not hold any per column + # or per alignment annotation - only per sequence. + + if count == 0: + raise ValueError("Must have at least one sequence") + if self._length_of_sequences == 0: + raise ValueError("Non-empty sequences are required") + + if not self._wrote_header: + self._wrote_header = True + self.handle.write("#FormatVersion Mauve1\n") + # There are some more headers, but we ignore those for now. + # Sequence1File unknown.fa + # Sequence1Entry 1 + # Sequence1Format FastA + for i in range(1, count + 1): + self.handle.write("#Sequence%sEntry\t%s\n" % (i, i)) + + for idx, record in enumerate(alignment): + self._write_record(record, record_idx=idx) + self.handle.write("=\n") + + def _write_record(self, record, record_idx=0): + """Write a single SeqRecord to the file (PRIVATE).""" + if self._length_of_sequences != len(record.seq): + raise ValueError("Sequences must all be the same length") + + seq_name = record.name + try: + seq_name = str(int(record.name)) + except ValueError: + seq_name = str(record_idx + 1) + + # We remove the "/{start}-{end}" before writing, as it cannot be part + # of the produced XMFA file. + if "start" in record.annotations and "end" in record.annotations: + suffix0 = "/%s-%s" % ( + record.annotations["start"], + record.annotations["end"], + ) + suffix1 = "/%s-%s" % ( + record.annotations["start"] + 1, + record.annotations["end"], + ) + if seq_name[-len(suffix0) :] == suffix0: + seq_name = seq_name[: -len(suffix0)] + if seq_name[-len(suffix1) :] == suffix1: + seq_name = seq_name[: -len(suffix1)] + + if ( + "start" in record.annotations + and "end" in record.annotations + and "strand" in record.annotations + ): + id_line = ID_LINE_FMT.format( + seq_name=seq_name, + start=record.annotations["start"] + 1, + end=record.annotations["end"], + strand=("+" if record.annotations["strand"] == 1 else "-"), + filename=record.name + ".fa", + ugly_hack=record.id, + ) + lacking_annotations = False + else: + id_line = ID_LINE_FMT.format( + seq_name=seq_name, + start=0, + end=0, + strand="+", + filename=record.name + ".fa", + ugly_hack=record.id, + ) + lacking_annotations = True + + # If the sequence is an empty one, skip writing it out + if (":0-0 " in id_line or ":1-0 " in id_line) and not lacking_annotations: + # Except in the first LCB + if not self._wrote_first: + self._wrote_first = True + # The first LCB we write out is special, and must list ALL + # sequences, for the Mauve GUI + # http://darlinglab.org/mauve/user-guide/files.html#non-standard-xmfa-formatting-used-by-the-mauve-gui + id_line = ID_LINE_FMT.format( + seq_name=seq_name, + start=0, + end=0, + strand="+", + filename=record.name + ".fa", + ugly_hack=record.id, + ) + id_line = id_line.replace("\n", " ").replace("\r", " ") + self.handle.write(id_line + "\n\n") + # Alignments lacking a start/stop/strand were generated by + # Biopython on load, and shouldn't exist according to XMFA + else: + # In other blocks, we only write sequences if they exist in a given + # alignment. + id_line = id_line.replace("\n", " ").replace("\r", " ") + self.handle.write(id_line + "\n") + for i in range(0, len(record.seq), 80): + self.handle.write("%s\n" % record.seq[i : i + 80]) + + +class MauveIterator(AlignmentIterator): + """Mauve xmfa alignment iterator.""" + + _ids = [] # for caching IDs between __next__ calls + + def __next__(self): + """Parse the next alignment from the handle.""" + handle = self.handle + line = handle.readline() + + if not line: + raise StopIteration + + # Strip out header comments + while line and line.strip().startswith("#"): + line = handle.readline() + + seqs = {} + seq_regions = {} + passed_end_alignment = False + + latest_id = None + while True: + if not line: + break # end of file + line = line.strip() + + if line.startswith("="): + # There may be more data, but we've reached the end of this + # alignment + break + elif line.startswith(">"): + m = XMFA_HEADER_REGEX_BIOPYTHON.match(line) + if not m: + m = XMFA_HEADER_REGEX.match(line) + if not m: + raise ValueError("Malformed header line: %s", line) + + parsed_id = m.group("id") + parsed_data = {} + for key in ("start", "end", "id", "strand", "name", "realname"): + try: + value = m.group(key) + if key == "start": + value = int(value) + # Convert to zero based counting + if value > 0: + value -= 1 + + if key == "end": + value = int(value) + parsed_data[key] = value + except IndexError: + # This will occur if we're asking for a group that + # doesn't exist. It's fine. + pass + seq_regions[parsed_id] = parsed_data + + if parsed_id not in self._ids: + self._ids.append(parsed_id) + + seqs.setdefault(parsed_id, "") + latest_id = parsed_id + else: + assert not passed_end_alignment + if latest_id is None: + raise ValueError("Saw sequence before definition line") + seqs[latest_id] += line + line = handle.readline() + + assert len(seqs) <= len(self._ids) + + self.ids = self._ids + self.sequences = seqs + + if self._ids and seqs: + alignment_length = max(map(len, list(seqs.values()))) + records = [] + for id in self._ids: + if id not in seqs or len(seqs[id]) == 0 or len(seqs[id]) == 0: + seq = "-" * alignment_length + else: + seq = seqs[id] + + if alignment_length != len(seq): + raise ValueError( + "Sequences have different lengths, or repeated identifier" + ) + + # Sometimes we don't see a particular sequence in the + # alignment, so we skip that record since it isn't present in + # that LCB/alignment + if id not in seq_regions: + continue + + if seq_regions[id]["start"] != 0 or seq_regions[id]["end"] != 0: + suffix = "/{start}-{end}".format(**seq_regions[id]) + if "realname" in seq_regions[id]: + corrected_id = seq_regions[id]["realname"] + else: + corrected_id = seq_regions[id]["name"] + if corrected_id.count(suffix) == 0: + corrected_id += suffix + else: + if "realname" in seq_regions[id]: + corrected_id = seq_regions[id]["realname"] + else: + corrected_id = seq_regions[id]["name"] + + record = SeqRecord(Seq(seq), id=corrected_id, name=id) + + record.annotations["start"] = seq_regions[id]["start"] + record.annotations["end"] = seq_regions[id]["end"] + record.annotations["strand"] = ( + 1 if seq_regions[id]["strand"] == "+" else -1 + ) + + records.append(record) + return MultipleSeqAlignment(records) + else: + raise StopIteration diff --git a/code/lib/Bio/AlignIO/MsfIO.py b/code/lib/Bio/AlignIO/MsfIO.py new file mode 100644 index 0000000..d620f1b --- /dev/null +++ b/code/lib/Bio/AlignIO/MsfIO.py @@ -0,0 +1,331 @@ +# Copyright 2019, National Marrow Donor Program (NMPD). All rights reserved. +# Written by Peter Cock, The James Hutton Institute, under contract to NMDP. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Bio.AlignIO support for GCG MSF format. + +The file format was produced by the GCG PileUp and and LocalPileUp tools, +and later tools such as T-COFFEE and MUSCLE support it as an optional +output format. + +The original GCG tool would write gaps at ends of each sequence which could +be missing data as tildes (``~``), whereas internal gaps were periods (``.``) +instead. This parser replaces both with minus signs (``-``) for consistency +with the rest of ``Bio.AlignIO``. + +You are expected to use this module via the Bio.AlignIO functions (or the +Bio.SeqIO functions if you want to work directly with the gapped sequences). +""" +from Bio.Align import MultipleSeqAlignment +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + +from .Interfaces import AlignmentIterator + + +class MsfIterator(AlignmentIterator): + """GCG MSF alignment iterator.""" + + _header = None # for caching lines between __next__ calls + + def __next__(self): + """Parse the next alignment from the handle.""" + handle = self.handle + + if self._header is None: + line = handle.readline() + else: + # Header we saved from when we were parsing + # the previous alignment. + line = self._header + self._header = None + + if not line: + raise StopIteration + + # Whitelisted headers we know about. + known_headers = ["!!NA_MULTIPLE_ALIGNMENT", "!!AA_MULTIPLE_ALIGNMENT", "PileUp"] + # Examples in "Molecular Biology Software Training Manual GCG version 10" + # by BBSRC Bioscuences IT Services (BITS), Harpenden, UK, Copyright 1996-2001 + # would often start as follows: + # + # !!AA_MUTIPLE_ALIGNMENT 1.0 + # PileUp of: @/usr/users2/culhane/... + # + # etc with other seemingly free format text before getting to the + # MSF/Type/Check line and the following Name: lines block and // line. + # + # MUSCLE just has a line "PileUp", while other sources just use the line + # "!!AA_MULTIPLE_ALIGNMENT" (amino acid) or "!!NA_MULTIPLE_ALIGNMENT" + # (nucleotide). + if line.strip().split()[0] not in known_headers: + raise ValueError( + "%s is not a known GCG MSF header: %s" + % (line.strip().split()[0], ", ".join(known_headers)) + ) + + while line and " MSF: " not in line: + line = handle.readline() + + if not line: + raise ValueError("Reached end of file without MSF/Type/Check header line") + + # Quoting from "Molecular Biology Software Training Manual GCG version 10" + # by BBSRC Bioscuences IT Services (BITS), Harpenden, UK. Copyright 1996-2001. + # Page 31: + # + # "Header information is before a .. (double dot) in a GCG format file. + # The file will also have a checksum specific for that file." + # + # This was followed by a single non-aligned sequence, but this convention + # appears to also be used in the GCG MSF files. Quoting other examples in + # this reference, page 31: + # + # localpileup_17.msf MSF: 195 Type: P January 6, 2000 15:41 Check: 4365 .. + # + # Except from page 148: + # + # localpileup_106.msf MSF: 457 Type: P November 28, 2000 16:09 Check: 2396 .. + # + # Quoting output from MUSCLE v3.8, have two leading spaces and a zero checksum: + # + # MSF: 689 Type: N Check: 0000 .. + # + # By observation, the MSF value is the column count, type is N (nucleotide) + # or P (protein / amino acid). + # + # In a possible bug, EMBOSS v6.6.0.0 uses CompCheck: rather than Check: as shown, + # + # $ seqret -sequence Tests/Fasta/f002 -auto -stdout -osformat msf + # !!NA_MULTIPLE_ALIGNMENT 1.0 + # + # stdout MSF: 633 Type: N 01/08/19 CompCheck: 8543 .. + # + # Name: G26680 Len: 633 Check: 4334 Weight: 1.00 + # Name: G26685 Len: 633 Check: 3818 Weight: 1.00 + # Name: G29385 Len: 633 Check: 391 Weight: 1.00 + # + # // + # + parts = line.strip("\n").split() + offset = parts.index("MSF:") + if ( + parts[offset + 2] != "Type:" + or parts[-3] not in ("Check:", "CompCheck:") + or parts[-1] != ".." + ): + raise ValueError( + "GCG MSF header line should be " + "' MSF: Type: Check: ..', " + " not: %r" % line + ) + try: + aln_length = int(parts[offset + 1]) + except ValueError: + aln_length = -1 + if aln_length < 0: + raise ValueError( + "GCG MSF header line should have MDF: for column count, not %r" + % parts[offset + 1] + ) + seq_type = parts[offset + 3] + if seq_type not in ["P", "N"]: + raise ValueError( + "GCG MSF header line should have 'Type: P' (protein) " + "or 'Type: N' (nucleotide), not 'Type: %s'" % seq_type + ) + + # There should be a blank line after that header line, then the Name: lines + # + # In a possible bug, T-COFFEE v12.00 adds 'oo' after the names, as shown here, + # + # PileUp + # + # + # + # MSF: 628 Type: P Check: 147 .. + # + # Name: AK1H_ECOLI/1-378 oo Len: 628 Check: 3643 Weight: 1.000 + # Name: AKH_HAEIN/1-382 oo Len: 628 Check: 6504 Weight: 1.000 + # + # // + ids = [] + lengths = [] + checks = [] + weights = [] + line = handle.readline() + while line and line.strip() != "//": + line = handle.readline() + if line.strip().startswith("Name: "): + if " Len: " in line and " Check: " in line and " Weight: " in line: + rest = line[line.index("Name: ") + 6 :].strip() + name, rest = rest.split(" Len: ") + length, rest = rest.split(" Check: ") + check, weight = rest.split(" Weight: ") + name = name.strip() + if name.endswith(" oo"): + # T-COFFEE oddity, ignore this + name = name[:-3] + if name in ids: + raise ValueError("Duplicated ID of %r" % name) + if " " in name: + raise NotImplementedError("Space in ID %r" % name) + ids.append(name) + # Expect aln_length <= int(length.strip()), see below + lengths.append(int(length.strip())) + checks.append(int(check.strip())) + weights.append(float(weight.strip())) + else: + raise ValueError("Malformed GCG MSF name line: %r" % line) + if not line: + raise ValueError("End of file while looking for end of header // line.") + + if aln_length != max(lengths): + # In broken examples from IMGTHLA was possible to continue + # https://github.com/ANHIG/IMGTHLA/issues/201 + max_length = max(lengths) + max_count = sum(1 for _ in lengths if _ == max_length) + raise ValueError( + "GCG MSF header said alignment length %i, but %s of %i sequences said Len: %s" + % (aln_length, max_count, len(ids), max_length) + ) + + line = handle.readline() + if not line: + raise ValueError("End of file after // line, expected sequences.") + if line.strip(): + raise ValueError("After // line, expected blank line before sequences.") + + # Now load the sequences + seqs = [[] for _ in ids] # list of empty lists + completed_length = 0 + while completed_length < aln_length: + # Note might have a coordinate header line (seems to be optional) + for idx, name in enumerate(ids): + line = handle.readline() + if idx == 0 and not line.strip(): + # T-COFFEE uses two blank lines between blocks, rather than one + while line and not line.strip(): + line = handle.readline() + if not line: + raise ValueError("End of file where expecting sequence data.") + # print("Looking for seq for %s in line: %r" % (name, line)) + words = line.strip().split() + # Should we use column numbers, rather than assuming no spaces in names? + if idx == 0 and words and words[0] != name: + # print("Actually have a coord line") + # Hopefully this is a coordinate header before the first seq + try: + i = int(words[0]) + except ValueError: + i = -1 + if i != completed_length + 1: + raise ValueError( + "Expected GCG MSF coordinate line starting %i, got: %r" + % (completed_length + 1, line) + ) + if len(words) > 1: + # Final block usually not full 50 chars, so expect start only. + if len(words) != 2: + i = -1 + else: + try: + i = int(words[1]) + except ValueError: + i = -1 + if i != ( + completed_length + 50 + if completed_length + 50 < aln_length + else aln_length + ): + raise ValueError( + "Expected GCG MSF coordinate line %i to %i, got: %r" + % ( + completed_length + 1, + completed_length + 50 + if completed_length + 50 < aln_length + else aln_length, + line, + ) + ) + line = handle.readline() + words = line.strip().split() + # print("Still looking for seq for %s in line: %r" % (name, line)) + # Dealt with any coordinate header line, should now be sequence + if not words: + # Should be sequence here, but perhaps its a short one? + if ( + lengths[idx] < aln_length + and len("".join(seqs[idx])) == lengths[idx] + ): + # Is this actually allowed in the format? Personally I would + # expect a line with name and a block of trailing ~ here. + pass + else: + raise ValueError( + "Expected sequence for %s, got: %r" % (name, line) + ) + elif words[0] == name: + assert len(words) > 1, line + # print(i, name, repr(words)) + seqs[idx].extend(words[1:]) + else: + raise ValueError("Expected sequence for %r, got: %r" % (name, line)) + # TODO - check the sequence lengths thus far are consistent + # with blocks of 50? + completed_length += 50 + line = handle.readline() + if line.strip(): + raise ValueError("Expected blank line, got: %r" % line) + + # Skip over any whitespace at the end... + while True: + line = handle.readline() + if not line: + # End of file, no more alignments + break + elif not line.strip(): + # Blank line, ignore + pass + elif line.strip().split()[0] in known_headers: + # Looks like the start of another alignment: + self._header = line + break + else: + raise ValueError("Unexpected line after GCG MSF alignment: %r" % line) + + # Combine list of strings into single string, remap gaps + seqs = ["".join(s).replace("~", "-").replace(".", "-") for s in seqs] + + # Apply any trailing padding for short sequences + padded = False + for idx, (length, s) in enumerate(zip(lengths, seqs)): + if len(s) < aln_length and len(s) == length: + padded = True + seqs[idx] = s + "-" * (aln_length - len(s)) + if padded: + import warnings + from Bio import BiopythonParserWarning + + warnings.warn( + "One of more alignment sequences were truncated and have been gap padded", + BiopythonParserWarning, + ) + + records = ( + SeqRecord(Seq(s), id=i, name=i, description=i, annotations={"weight": w},) + for (i, s, w) in zip(ids, seqs, weights) + ) + + # This will check alignment lengths are self-consistent: + align = MultipleSeqAlignment(records) + # Check matches the header: + if align.get_alignment_length() != aln_length: + raise ValueError( + "GCG MSF headers said alignment length %i, but have %i" + % (aln_length, align.get_alignment_length()) + ) + return align diff --git a/code/lib/Bio/AlignIO/NexusIO.py b/code/lib/Bio/AlignIO/NexusIO.py new file mode 100644 index 0000000..2c97e2e --- /dev/null +++ b/code/lib/Bio/AlignIO/NexusIO.py @@ -0,0 +1,166 @@ +# Copyright 2008-2010, 2012-2014, 2016-2017 by Peter Cock. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Bio.AlignIO support for the "nexus" file format. + +You are expected to use this module via the Bio.AlignIO functions (or the +Bio.SeqIO functions if you want to work directly with the gapped sequences). + +See also the Bio.Nexus module (which this code calls internally), +as this offers more than just accessing the alignment or its +sequences as SeqRecord objects. +""" +from Bio.Align import MultipleSeqAlignment +from Bio.AlignIO.Interfaces import AlignmentWriter +from Bio.Nexus import Nexus +from Bio.SeqRecord import SeqRecord + + +# You can get a couple of example files here: +# http://www.molecularevolution.org/resources/fileformats/ + + +# This is a generator function! +def NexusIterator(handle, seq_count=None): + """Return SeqRecord objects from a Nexus file. + + Thus uses the Bio.Nexus module to do the hard work. + + You are expected to call this function via Bio.SeqIO or Bio.AlignIO + (and not use it directly). + + NOTE - We only expect ONE alignment matrix per Nexus file, + meaning this iterator will only yield one MultipleSeqAlignment. + """ + n = Nexus.Nexus(handle) + if not n.matrix: + # No alignment found + return + + # Bio.Nexus deals with duplicated names by adding a '.copy' suffix. + # The original names and the modified names are kept in these two lists: + assert len(n.unaltered_taxlabels) == len(n.taxlabels) + + if seq_count and seq_count != len(n.unaltered_taxlabels): + raise ValueError( + "Found %i sequences, but seq_count=%i" + % (len(n.unaltered_taxlabels), seq_count) + ) + + # TODO - Can we extract any annotation too? + if n.datatype in ("dna", "nucleotide"): + annotations = {"molecule_type": "DNA"} + elif n.datatype == "rna": + annotations = {"molecule_type": "RNA"} + elif n.datatype == "protein": + annotations = {"molecule_type": "protein"} + else: + annotations = None + records = ( + SeqRecord( + n.matrix[new_name], + id=new_name, + name=old_name, + description="", + annotations=annotations, + ) + for old_name, new_name in zip(n.unaltered_taxlabels, n.taxlabels) + ) + # All done + yield MultipleSeqAlignment(records) + + +class NexusWriter(AlignmentWriter): + """Nexus alignment writer. + + Note that Nexus files are only expected to hold ONE alignment + matrix. + + You are expected to call this class via the Bio.AlignIO.write() or + Bio.SeqIO.write() functions. + """ + + def write_file(self, alignments): + """Use this to write an entire file containing the given alignments. + + Arguments: + - alignments - A list or iterator returning MultipleSeqAlignment objects. + This should hold ONE and only one alignment. + + """ + align_iter = iter(alignments) # Could have been a list + try: + alignment = next(align_iter) + except StopIteration: + # Nothing to write! + return 0 + + # Check there is only one alignment... + try: + next(align_iter) + raise ValueError("We can only write one Alignment to a Nexus file.") + except StopIteration: + pass + + # Good. Actually write the single alignment, + self.write_alignment(alignment) + return 1 # we only support writing one alignment! + + def write_alignment(self, alignment, interleave=None): + """Write an alignment to file. + + Creates an empty Nexus object, adds the sequences + and then gets Nexus to prepare the output. + Default interleave behaviour: Interleave if columns > 1000 + --> Override with interleave=[True/False] + """ + if len(alignment) == 0: + raise ValueError("Must have at least one sequence") + columns = alignment.get_alignment_length() + if columns == 0: + raise ValueError("Non-empty sequences are required") + datatype = self._classify_mol_type_for_nexus(alignment) + minimal_record = ( + "#NEXUS\nbegin data; dimensions ntax=0 nchar=0; format datatype=%s; end;" + % datatype + ) + n = Nexus.Nexus(minimal_record) + for record in alignment: + # Sanity test sequences (should this be even stricter?) + if datatype == "dna" and "U" in record.seq: + raise ValueError(f"{record.id} contains U, but DNA alignment") + elif datatype == "rna" and "T" in record.seq: + raise ValueError(f"{record.id} contains T, but RNA alignment") + n.add_sequence(record.id, str(record.seq)) + + # Note: MrBayes may choke on large alignments if not interleaved + if interleave is None: + interleave = columns > 1000 + n.write_nexus_data(self.handle, interleave=interleave) + + def _classify_mol_type_for_nexus(self, alignment): + """Return 'protein', 'dna', or 'rna' based on records' molecule type (PRIVATE). + + All the records must have a molecule_type annotation, and they must + agree. + + Raises an exception if this is not possible. + """ + values = {_.annotations.get("molecule_type", None) for _ in alignment} + if all(_ and "DNA" in _ for _ in values): + return "dna" # could have been a mix of "DNA" and "gDNA" + elif all(_ and "RNA" in _ for _ in values): + return "rna" # could have been a mix of "RNA" and "mRNA" + elif all(_ and "protein" in _ for _ in values): + return "protein" + else: + raise ValueError("Need the molecule type to be defined") + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest(verbose=0) diff --git a/code/lib/Bio/AlignIO/PhylipIO.py b/code/lib/Bio/AlignIO/PhylipIO.py new file mode 100644 index 0000000..cc3f665 --- /dev/null +++ b/code/lib/Bio/AlignIO/PhylipIO.py @@ -0,0 +1,454 @@ +# Copyright 2006-2016 by Peter Cock. All rights reserved. +# Revisions copyright 2011 Brandon Invergo. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""AlignIO support for "phylip" format from Joe Felsenstein's PHYLIP tools. + +You are expected to use this module via the Bio.AlignIO functions (or the +Bio.SeqIO functions if you want to work directly with the gapped sequences). + +Support for "relaxed phylip" format is also provided. Relaxed phylip differs +from standard phylip format in the following ways: + + - No whitespace is allowed in the sequence ID. + - No truncation is performed. Instead, sequence IDs are padded to the longest + ID length, rather than 10 characters. A space separates the sequence + identifier from the sequence. + +Relaxed phylip is supported by RAxML and PHYML. + +Note +==== + +In TREE_PUZZLE (Schmidt et al. 2003) and PHYML (Guindon and Gascuel 2003) +a dot/period (".") in a sequence is interpreted as meaning the same +character as in the first sequence. The PHYLIP documentation from 3.3 to 3.69 +http://evolution.genetics.washington.edu/phylip/doc/sequence.html says: + +"a period was also previously allowed but it is no longer allowed, +because it sometimes is used in different senses in other programs" + +Biopython 1.58 or later treats dots/periods in the sequence as invalid, both +for reading and writing. Older versions did nothing special with a dot/period. +""" +import string + +from Bio.Align import MultipleSeqAlignment +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + +from .Interfaces import AlignmentIterator +from .Interfaces import SequentialAlignmentWriter + + +_PHYLIP_ID_WIDTH = 10 +_NO_DOTS = "PHYLIP format no longer allows dots in sequence" + + +class PhylipWriter(SequentialAlignmentWriter): + """Phylip alignment writer.""" + + def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH): + """Use this to write (another) single alignment to an open file. + + This code will write interlaced alignments (when the sequences are + longer than 50 characters). + + Note that record identifiers are strictly truncated to id_width, + defaulting to the value required to comply with the PHYLIP standard. + + For more information on the file format, please see: + http://evolution.genetics.washington.edu/phylip/doc/sequence.html + http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles + """ + handle = self.handle + + if len(alignment) == 0: + raise ValueError("Must have at least one sequence") + length_of_seqs = alignment.get_alignment_length() + for record in alignment: + if length_of_seqs != len(record.seq): + raise ValueError("Sequences must all be the same length") + if length_of_seqs <= 0: + raise ValueError("Non-empty sequences are required") + + # Check for repeated identifiers... + # Apply this test *after* cleaning the identifiers + names = [] + seqs = [] + for record in alignment: + """ + Quoting the PHYLIP version 3.6 documentation: + + The name should be ten characters in length, filled out to + the full ten characters by blanks if shorter. Any printable + ASCII/ISO character is allowed in the name, except for + parentheses ("(" and ")"), square brackets ("[" and "]"), + colon (":"), semicolon (";") and comma (","). If you forget + to extend the names to ten characters in length by blanks, + the program [i.e. PHYLIP] will get out of synchronization + with the contents of the data file, and an error message will + result. + + Note that Tab characters count as only one character in the + species names. Their inclusion can cause trouble. + """ + name = sanitize_name(record.id, id_width) + if name in names: + raise ValueError( + "Repeated name %r (originally %r), possibly due to truncation" + % (name, record.id) + ) + names.append(name) + sequence = str(record.seq) + if "." in sequence: + # Do this check here (once per record, not once per block) + raise ValueError(_NO_DOTS) + seqs.append(sequence) + + # From experimentation, the use of tabs is not understood by the + # EMBOSS suite. The nature of the expected white space is not + # defined in the PHYLIP documentation, simply "These are in free + # format, separated by blanks". We'll use spaces to keep EMBOSS + # happy. + handle.write(" %i %s\n" % (len(alignment), length_of_seqs)) + block = 0 + while True: + for name, sequence in zip(names, seqs): + if block == 0: + # Write name (truncated/padded to id_width characters) + # Now truncate and right pad to expected length. + handle.write(name[:id_width].ljust(id_width)) + else: + # write indent + handle.write(" " * id_width) + # Write five chunks of ten letters per line... + for chunk in range(0, 5): + i = block * 50 + chunk * 10 + seq_segment = sequence[i : i + 10] + # TODO - Force any gaps to be '-' character? + # TODO - How to cope with '?' or '.' in the sequence? + handle.write(" %s" % seq_segment) + if i + 10 > length_of_seqs: + break + handle.write("\n") + block += 1 + if block * 50 >= length_of_seqs: + break + handle.write("\n") + + +class PhylipIterator(AlignmentIterator): + """Reads a Phylip alignment file returning a MultipleSeqAlignment iterator. + + Record identifiers are limited to at most 10 characters. + + It only copes with interlaced phylip files! Sequential files won't work + where the sequences are split over multiple lines. + + For more information on the file format, please see: + http://evolution.genetics.washington.edu/phylip/doc/sequence.html + http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles + """ + + # Default truncation length + id_width = _PHYLIP_ID_WIDTH + + _header = None # for caching lines between __next__ calls + + def _is_header(self, line): + line = line.strip() + parts = [x for x in line.split() if x] + if len(parts) != 2: + return False # First line should have two integers + try: + number_of_seqs = int(parts[0]) + length_of_seqs = int(parts[1]) + return True + except ValueError: + return False # First line should have two integers + + def _split_id(self, line): + """Extract the sequence ID from a Phylip line (PRIVATE). + + Returning a tuple containing: (sequence_id, sequence_residues) + + The first 10 characters in the line are are the sequence id, the + remainder are sequence data. + """ + seq_id = line[: self.id_width].strip() + seq = line[self.id_width :].strip().replace(" ", "") + return seq_id, seq + + def __next__(self): + """Parse the next alignment from the handle.""" + handle = self.handle + + if self._header is None: + line = handle.readline() + else: + # Header we saved from when we were parsing + # the previous alignment. + line = self._header + self._header = None + + if not line: + raise StopIteration + line = line.strip() + parts = [x for x in line.split() if x] + if len(parts) != 2: + raise ValueError("First line should have two integers") + try: + number_of_seqs = int(parts[0]) + length_of_seqs = int(parts[1]) + except ValueError: + raise ValueError("First line should have two integers") from None + + assert self._is_header(line) + + if ( + self.records_per_alignment is not None + and self.records_per_alignment != number_of_seqs + ): + raise ValueError( + "Found %i records in this alignment, told to expect %i" + % (number_of_seqs, self.records_per_alignment) + ) + + ids = [] + seqs = [] + + # By default, expects STRICT truncation / padding to 10 characters. + # Does not require any whitespace between name and seq. + for i in range(number_of_seqs): + line = handle.readline().rstrip() + sequence_id, s = self._split_id(line) + ids.append(sequence_id) + if "." in s: + raise ValueError(_NO_DOTS) + seqs.append([s]) + + # Look for further blocks + line = "" + while True: + # Skip any blank lines between blocks... + while "" == line.strip(): + line = handle.readline() + if not line: + break # end of file + if not line: + break # end of file + + if self._is_header(line): + # Looks like the start of a concatenated alignment + self._header = line + break + + # print("New block...") + for i in range(number_of_seqs): + s = line.strip().replace(" ", "") + if "." in s: + raise ValueError(_NO_DOTS) + seqs[i].append(s) + line = handle.readline() + if (not line) and i + 1 < number_of_seqs: + raise ValueError("End of file mid-block") + if not line: + break # end of file + + records = ( + SeqRecord(Seq("".join(s)), id=i, name=i, description=i) + for (i, s) in zip(ids, seqs) + ) + return MultipleSeqAlignment(records) + + +# Relaxed Phylip +class RelaxedPhylipWriter(PhylipWriter): + """Relaxed Phylip format writer.""" + + def write_alignment(self, alignment): + """Write a relaxed phylip alignment.""" + # Check inputs + for name in (s.id.strip() for s in alignment): + if any(c in name for c in string.whitespace): + raise ValueError("Whitespace not allowed in identifier: %s" % name) + + # Calculate a truncation length - maximum length of sequence ID plus a + # single character for padding + # If no sequences, set id_width to 1. super(...) call will raise a + # ValueError + if len(alignment) == 0: + id_width = 1 + else: + id_width = max(len(s.id.strip()) for s in alignment) + 1 + super().write_alignment(alignment, id_width) + + +class RelaxedPhylipIterator(PhylipIterator): + """Relaxed Phylip format Iterator.""" + + def _split_id(self, line): + """Extract the sequence ID from a Phylip line (PRIVATE). + + Returns a tuple containing: (sequence_id, sequence_residues) + + For relaxed format split at the first whitespace character. + """ + seq_id, sequence = line.split(None, 1) + sequence = sequence.strip().replace(" ", "") + return seq_id, sequence + + +class SequentialPhylipWriter(SequentialAlignmentWriter): + """Sequential Phylip format Writer.""" + + def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH): + """Write a Phylip alignment to the handle.""" + handle = self.handle + + if len(alignment) == 0: + raise ValueError("Must have at least one sequence") + length_of_seqs = alignment.get_alignment_length() + for record in alignment: + if length_of_seqs != len(record.seq): + raise ValueError("Sequences must all be the same length") + if length_of_seqs <= 0: + raise ValueError("Non-empty sequences are required") + + # Check for repeated identifiers... + # Apply this test *after* cleaning the identifiers + names = [] + for record in alignment: + # Either remove the banned characters, or map them to something + # else like an underscore "_" or pipe "|" character... + name = sanitize_name(record.id, id_width) + if name in names: + raise ValueError( + "Repeated name %r (originally %r), possibly due to truncation" + % (name, record.id) + ) + names.append(name) + + # From experimentation, the use of tabs is not understood by the + # EMBOSS suite. The nature of the expected white space is not + # defined in the PHYLIP documentation, simply "These are in free + # format, separated by blanks". We'll use spaces to keep EMBOSS + # happy. + handle.write(" %i %s\n" % (len(alignment), length_of_seqs)) + for name, record in zip(names, alignment): + sequence = str(record.seq) + if "." in sequence: + raise ValueError(_NO_DOTS) + handle.write(name[:id_width].ljust(id_width)) + # Write the entire sequence to one line (see sequential format + # notes in the SequentialPhylipIterator docstring + handle.write(sequence) + handle.write("\n") + + +class SequentialPhylipIterator(PhylipIterator): + """Sequential Phylip format Iterator. + + The sequential format carries the same restrictions as the normal + interleaved one, with the difference being that the sequences are listed + sequentially, each sequence written in its entirety before the start of + the next. According to the PHYLIP documentation for input file + formatting, newlines and spaces may optionally be entered at any point + in the sequences. + """ + + _header = None # for caching lines between __next__ calls + + def __next__(self): + """Parse the next alignment from the handle.""" + handle = self.handle + + if self._header is None: + line = handle.readline() + else: + # Header we saved from when we were parsing + # the previous alignment. + line = self._header + self._header = None + + if not line: + raise StopIteration + line = line.strip() + parts = [x for x in line.split() if x] + if len(parts) != 2: + raise ValueError("First line should have two integers") + try: + number_of_seqs = int(parts[0]) + length_of_seqs = int(parts[1]) + except ValueError: + raise ValueError("First line should have two integers") from None + + assert self._is_header(line) + + if ( + self.records_per_alignment is not None + and self.records_per_alignment != number_of_seqs + ): + raise ValueError( + "Found %i records in this alignment, told to expect %i" + % (number_of_seqs, self.records_per_alignment) + ) + + ids = [] + seqs = [] + + # By default, expects STRICT truncation / padding to 10 characters. + # Does not require any whitespace between name and seq. + for i in range(number_of_seqs): + line = handle.readline().rstrip() + sequence_id, s = self._split_id(line) + ids.append(sequence_id) + while len(s) < length_of_seqs: + # The sequence may be split into multiple lines + line = handle.readline().strip() + if not line: + break + if line == "": + continue + s = "".join([s, line.strip().replace(" ", "")]) + if len(s) > length_of_seqs: + raise ValueError( + "Found a record of length %i, " + "should be %i" % (len(s), length_of_seqs) + ) + if "." in s: + raise ValueError(_NO_DOTS) + seqs.append(s) + while True: + # Find other alignments in the file + line = handle.readline() + if not line: + break + if self._is_header(line): + self._header = line + break + + records = ( + SeqRecord(Seq(s), id=i, name=i, description=i) for (i, s) in zip(ids, seqs) + ) + return MultipleSeqAlignment(records) + + +def sanitize_name(name, width=None): + """Sanitise sequence identifier for output. + + Removes the banned characters "[]()" and replaces the characters ":;" + with "|". The name is truncated to "width" characters if specified. + """ + name = name.strip() + for char in "[](),": + name = name.replace(char, "") + for char in ":;": + name = name.replace(char, "|") + if width is not None: + name = name[:width] + return name diff --git a/code/lib/Bio/AlignIO/StockholmIO.py b/code/lib/Bio/AlignIO/StockholmIO.py new file mode 100644 index 0000000..386e762 --- /dev/null +++ b/code/lib/Bio/AlignIO/StockholmIO.py @@ -0,0 +1,630 @@ +# Copyright 2006-2016 by Peter Cock. All rights reserved. +# Revisions copyright 2015 by Ben Woodcroft. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Bio.AlignIO support for "stockholm" format (used in the PFAM database). + +You are expected to use this module via the Bio.AlignIO functions (or the +Bio.SeqIO functions if you want to work directly with the gapped sequences). + +For example, consider a Stockholm alignment file containing the following:: + + # STOCKHOLM 1.0 + #=GC SS_cons .................<<<<<<<<...<<<<<<<........>>>>>>>.. + AP001509.1 UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGU + #=GR AP001509.1 SS -----------------<<<<<<<<---..<<-<<-------->>->>..-- + AE007476.1 AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGU + #=GR AE007476.1 SS -----------------<<<<<<<<-----<<.<<-------->>.>>---- + + #=GC SS_cons ......<<<<<<<.......>>>>>>>..>>>>>>>>............... + AP001509.1 CUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU + #=GR AP001509.1 SS -------<<<<<--------->>>>>--->>>>>>>>--------------- + AE007476.1 UUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU + #=GR AE007476.1 SS ------.<<<<<--------->>>>>.-->>>>>>>>--------------- + // + +This is a single multiple sequence alignment, so you would probably load this +using the Bio.AlignIO.read() function: + + >>> from Bio import AlignIO + >>> align = AlignIO.read("Stockholm/simple.sth", "stockholm") + >>> print(align) + Alignment with 2 rows and 104 columns + UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-G...UGU AP001509.1 + AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-C...GAU AE007476.1 + >>> for record in align: + ... print("%s %i" % (record.id, len(record))) + AP001509.1 104 + AE007476.1 104 + +In addition to the sequences themselves, this example alignment also includes +some GR lines for the secondary structure of the sequences. These are +strings, with one character for each letter in the associated sequence: + + >>> for record in align: + ... print(record.id) + ... print(record.seq) + ... print(record.letter_annotations['secondary_structure']) + AP001509.1 + UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU + -----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>--------------- + AE007476.1 + AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU + -----------------<<<<<<<<-----<<.<<-------->>.>>----------.<<<<<--------->>>>>.-->>>>>>>>--------------- + +Any general annotation for each row is recorded in the SeqRecord's annotations +dictionary. Any per-column annotation for the entire alignment in in the +alignment's column annotations dictionary, such as the secondary structure +consensus in this example: + + >>> sorted(align.column_annotations.keys()) + ['secondary_structure'] + >>> align.column_annotations["secondary_structure"] + '.................<<<<<<<<...<<<<<<<........>>>>>>>........<<<<<<<.......>>>>>>>..>>>>>>>>...............' + +You can output this alignment in many different file formats +using Bio.AlignIO.write(), or the MultipleSeqAlignment object's format method: + + >>> print(format(align, "fasta")) + >AP001509.1 + UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-A + GGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU + >AE007476.1 + AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAA + GGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU + + +Most output formats won't be able to hold the annotation possible in a +Stockholm file: + + >>> print(format(align, "stockholm")) + # STOCKHOLM 1.0 + #=GF SQ 2 + AP001509.1 UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU + #=GS AP001509.1 AC AP001509.1 + #=GS AP001509.1 DE AP001509.1 + #=GR AP001509.1 SS -----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>--------------- + AE007476.1 AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU + #=GS AE007476.1 AC AE007476.1 + #=GS AE007476.1 DE AE007476.1 + #=GR AE007476.1 SS -----------------<<<<<<<<-----<<.<<-------->>.>>----------.<<<<<--------->>>>>.-->>>>>>>>--------------- + #=GC SS_cons .................<<<<<<<<...<<<<<<<........>>>>>>>........<<<<<<<.......>>>>>>>..>>>>>>>>............... + // + + +Note that when writing Stockholm files, AlignIO does not break long sequences +up and interleave them (as in the input file shown above). The standard +allows this simpler layout, and it is more likely to be understood by other +tools. + +Finally, as an aside, it can sometimes be useful to use Bio.SeqIO.parse() to +iterate over the alignment rows as SeqRecord objects - rather than working +with Alignnment objects. + + >>> from Bio import SeqIO + >>> for record in SeqIO.parse("Stockholm/simple.sth", "stockholm"): + ... print(record.id) + ... print(record.seq) + ... print(record.letter_annotations['secondary_structure']) + AP001509.1 + UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU + -----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>--------------- + AE007476.1 + AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU + -----------------<<<<<<<<-----<<.<<-------->>.>>----------.<<<<<--------->>>>>.-->>>>>>>>--------------- + +Remember that if you slice a SeqRecord, the per-letter-annotations like the +secondary structure string here, are also sliced: + + >>> sub_record = record[10:20] + >>> print(sub_record.seq) + AUCGUUUUAC + >>> print(sub_record.letter_annotations['secondary_structure']) + -------<<< + +Likewise with the alignment object, as long as you are not dropping any rows, +slicing specific columns of an alignment will slice any per-column-annotations: + + >>> align.column_annotations["secondary_structure"] + '.................<<<<<<<<...<<<<<<<........>>>>>>>........<<<<<<<.......>>>>>>>..>>>>>>>>...............' + >>> part_align = align[:,10:20] + >>> part_align.column_annotations["secondary_structure"] + '.......<<<' + +You can also see this in the Stockholm output of this partial-alignment: + + >>> print(format(part_align, "stockholm")) + # STOCKHOLM 1.0 + #=GF SQ 2 + AP001509.1 UCAACACUCU + #=GS AP001509.1 AC AP001509.1 + #=GS AP001509.1 DE AP001509.1 + #=GR AP001509.1 SS -------<<< + AE007476.1 AUCGUUUUAC + #=GS AE007476.1 AC AE007476.1 + #=GS AE007476.1 DE AE007476.1 + #=GR AE007476.1 SS -------<<< + #=GC SS_cons .......<<< + // + + +""" +from collections import OrderedDict + +from Bio.Align import MultipleSeqAlignment +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + +from .Interfaces import AlignmentIterator +from .Interfaces import SequentialAlignmentWriter + + +class StockholmWriter(SequentialAlignmentWriter): + """Stockholm/PFAM alignment writer.""" + + # These dictionaries should be kept in sync with those + # defined in the StockholmIterator class. + pfam_gr_mapping = { + "secondary_structure": "SS", + "surface_accessibility": "SA", + "transmembrane": "TM", + "posterior_probability": "PP", + "ligand_binding": "LI", + "active_site": "AS", + "intron": "IN", + } + # These GC mappings are in addition to *_cons in GR mapping: + pfam_gc_mapping = {"reference_annotation": "RF", "model_mask": "MM"} + # Following dictionary deliberately does not cover AC, DE or DR + pfam_gs_mapping = {"organism": "OS", "organism_classification": "OC", "look": "LO"} + + def write_alignment(self, alignment): + """Use this to write (another) single alignment to an open file. + + Note that sequences and their annotation are recorded + together (rather than having a block of annotation followed + by a block of aligned sequences). + """ + count = len(alignment) + + self._length_of_sequences = alignment.get_alignment_length() + self._ids_written = [] + + if count == 0: + raise ValueError("Must have at least one sequence") + if self._length_of_sequences == 0: + raise ValueError("Non-empty sequences are required") + + self.handle.write("# STOCKHOLM 1.0\n") + self.handle.write("#=GF SQ %i\n" % count) + for record in alignment: + self._write_record(record) + # This shouldn't be None... but just in case, + if alignment.column_annotations: + for k, v in sorted(alignment.column_annotations.items()): + if k in self.pfam_gc_mapping: + self.handle.write("#=GC %s %s\n" % (self.pfam_gc_mapping[k], v)) + elif k in self.pfam_gr_mapping: + self.handle.write( + "#=GC %s %s\n" % (self.pfam_gr_mapping[k] + "_cons", v) + ) + else: + # It doesn't follow the PFAM standards, but should we record + # this data anyway? + pass + self.handle.write("//\n") + + def _write_record(self, record): + """Write a single SeqRecord to the file (PRIVATE).""" + if self._length_of_sequences != len(record.seq): + raise ValueError("Sequences must all be the same length") + + # For the case for stockholm to stockholm, try and use record.name + seq_name = record.id + if record.name is not None: + if "accession" in record.annotations: + if record.id == record.annotations["accession"]: + seq_name = record.name + + # In the Stockholm file format, spaces are not allowed in the id + seq_name = seq_name.replace(" ", "_") + + if "start" in record.annotations and "end" in record.annotations: + suffix = "/%s-%s" % ( + record.annotations["start"], + record.annotations["end"], + ) + if seq_name[-len(suffix) :] != suffix: + seq_name = "%s/%s-%s" % ( + seq_name, + record.annotations["start"], + record.annotations["end"], + ) + + if seq_name in self._ids_written: + raise ValueError("Duplicate record identifier: %s" % seq_name) + self._ids_written.append(seq_name) + self.handle.write("%s %s\n" % (seq_name, record.seq)) + + # The recommended placement for GS lines (per sequence annotation) + # is above the alignment (as a header block) or just below the + # corresponding sequence. + # + # The recommended placement for GR lines (per sequence per column + # annotation such as secondary structure) is just below the + # corresponding sequence. + # + # We put both just below the corresponding sequence as this allows + # us to write the file using a single pass through the records. + + # AC = Accession + if "accession" in record.annotations: + self.handle.write( + "#=GS %s AC %s\n" + % (seq_name, self.clean(record.annotations["accession"])) + ) + elif record.id: + self.handle.write("#=GS %s AC %s\n" % (seq_name, self.clean(record.id))) + + # DE = description + if record.description: + self.handle.write( + "#=GS %s DE %s\n" % (seq_name, self.clean(record.description)) + ) + + # DE = database links + for xref in record.dbxrefs: + self.handle.write("#=GS %s DR %s\n" % (seq_name, self.clean(xref))) + + # GS = other per sequence annotation + for key, value in record.annotations.items(): + if key in self.pfam_gs_mapping: + data = self.clean(str(value)) + if data: + self.handle.write( + "#=GS %s %s %s\n" + % (seq_name, self.clean(self.pfam_gs_mapping[key]), data) + ) + else: + # It doesn't follow the PFAM standards, but should we record + # this data anyway? + pass + + # GR = per row per column sequence annotation + for key, value in record.letter_annotations.items(): + if key in self.pfam_gr_mapping and len(str(value)) == len(record.seq): + data = self.clean(str(value)) + if data: + self.handle.write( + "#=GR %s %s %s\n" + % (seq_name, self.clean(self.pfam_gr_mapping[key]), data) + ) + else: + # It doesn't follow the PFAM standards, but should we record + # this data anyway? + pass + + +class StockholmIterator(AlignmentIterator): + """Loads a Stockholm file from PFAM into MultipleSeqAlignment objects. + + The file may contain multiple concatenated alignments, which are loaded + and returned incrementally. + + This parser will detect if the Stockholm file follows the PFAM + conventions for sequence specific meta-data (lines starting #=GS + and #=GR) and populates the SeqRecord fields accordingly. + + Any annotation which does not follow the PFAM conventions is currently + ignored. + + If an accession is provided for an entry in the meta data, IT WILL NOT + be used as the record.id (it will be recorded in the record's + annotations). This is because some files have (sub) sequences from + different parts of the same accession (differentiated by different + start-end positions). + + Wrap-around alignments are not supported - each sequences must be on + a single line. However, interlaced sequences should work. + + For more information on the file format, please see: + http://sonnhammer.sbc.su.se/Stockholm.html + https://en.wikipedia.org/wiki/Stockholm_format + http://bioperl.org/formats/alignment_formats/Stockholm_multiple_alignment_format.html + + For consistency with BioPerl and EMBOSS we call this the "stockholm" + format. + """ + + # These dictionaries should be kept in sync with those + # defined in the PfamStockholmWriter class. + pfam_gr_mapping = { + "SS": "secondary_structure", + "SA": "surface_accessibility", + "TM": "transmembrane", + "PP": "posterior_probability", + "LI": "ligand_binding", + "AS": "active_site", + "IN": "intron", + } + # These GC mappings are in addition to *_cons in GR mapping: + pfam_gc_mapping = {"RF": "reference_annotation", "MM": "model_mask"} + # Following dictionary deliberately does not cover AC, DE or DR + pfam_gs_mapping = {"OS": "organism", "OC": "organism_classification", "LO": "look"} + + _header = None # for caching lines between __next__ calls + + def __next__(self): + """Parse the next alignment from the handle.""" + handle = self.handle + + if self._header is None: + line = handle.readline() + else: + # Header we saved from when we were parsing + # the previous alignment. + line = self._header + self._header = None + + if not line: + # Empty file - just give up. + raise StopIteration + if line.strip() != "# STOCKHOLM 1.0": + raise ValueError("Did not find STOCKHOLM header") + + # Note: If this file follows the PFAM conventions, there should be + # a line containing the number of sequences, e.g. "#=GF SQ 67" + # We do not check for this - perhaps we should, and verify that + # if present it agrees with our parsing. + + seqs = {} + ids = OrderedDict() # Really only need an OrderedSet, but python lacks this + gs = {} + gr = {} + gf = {} + gc = {} + passed_end_alignment = False + while True: + line = handle.readline() + if not line: + break # end of file + line = line.strip() # remove trailing \n + if line == "# STOCKHOLM 1.0": + self._header = line + break + elif line == "//": + # The "//" line indicates the end of the alignment. + # There may still be more meta-data + passed_end_alignment = True + elif line == "": + # blank line, ignore + pass + elif line[0] != "#": + # Sequence + # Format: " " + assert not passed_end_alignment + parts = [x.strip() for x in line.split(" ", 1)] + if len(parts) != 2: + # This might be someone attempting to store a zero length sequence? + raise ValueError( + "Could not split line into identifier and sequence:\n" + line + ) + seq_id, seq = parts + if seq_id not in ids: + ids[seq_id] = True + seqs.setdefault(seq_id, "") + seqs[seq_id] += seq.replace(".", "-") + elif len(line) >= 5: + # Comment line or meta-data + if line[:5] == "#=GF ": + # Generic per-File annotation, free text + # Format: #=GF + feature, text = line[5:].strip().split(None, 1) + # Each feature key could be used more than once, + # so store the entries as a list of strings. + if feature not in gf: + gf[feature] = [text] + else: + gf[feature].append(text) + elif line[:5] == "#=GC ": + # Generic per-Column annotation, exactly 1 char per column + # Format: "#=GC " + feature, text = line[5:].strip().split(None, 2) + if feature not in gc: + gc[feature] = "" + gc[feature] += text.strip() # append to any previous entry + # Might be interleaved blocks, so can't check length yet + elif line[:5] == "#=GS ": + # Generic per-Sequence annotation, free text + # Format: "#=GS " + try: + seq_id, feature, text = line[5:].strip().split(None, 2) + except ValueError: + # Free text can sometimes be empty, which a one line split throws an error for. + # See https://github.com/biopython/biopython/issues/2982 for more details + seq_id, feature = line[5:].strip().split(None, 1) + text = "" + # if seq_id not in ids: + # ids.append(seq_id) + if seq_id not in gs: + gs[seq_id] = {} + if feature not in gs[seq_id]: + gs[seq_id][feature] = [text] + else: + gs[seq_id][feature].append(text) + elif line[:5] == "#=GR ": + # Generic per-Sequence AND per-Column markup + # Format: "#=GR " + seq_id, feature, text = line[5:].strip().split(None, 2) + # if seq_id not in ids: + # ids.append(seq_id) + if seq_id not in gr: + gr[seq_id] = {} + if feature not in gr[seq_id]: + gr[seq_id][feature] = "" + gr[seq_id][feature] += text.strip() # append to any previous entry + # Might be interleaved blocks, so can't check length yet + # Next line... + + assert len(seqs) <= len(ids) + # assert len(gs) <= len(ids) + # assert len(gr) <= len(ids) + + self.ids = ids.keys() + self.sequences = seqs + self.seq_annotation = gs + self.seq_col_annotation = gr + + if ids and seqs: + + if ( + self.records_per_alignment is not None + and self.records_per_alignment != len(ids) + ): + raise ValueError( + "Found %i records in this alignment, told to expect %i" + % (len(ids), self.records_per_alignment) + ) + + alignment_length = len(list(seqs.values())[0]) + records = [] # Alignment obj will put them all in a list anyway + for seq_id in ids: + seq = seqs[seq_id] + if alignment_length != len(seq): + raise ValueError( + "Sequences have different lengths, or repeated identifier" + ) + name, start, end = self._identifier_split(seq_id) + record = SeqRecord( + Seq(seq), + id=seq_id, + name=name, + description=seq_id, + annotations={"accession": name}, + ) + # Accession will be overridden by _populate_meta_data if an explicit + # accession is provided: + record.annotations["accession"] = name + + if start is not None: + record.annotations["start"] = start + if end is not None: + record.annotations["end"] = end + + self._populate_meta_data(seq_id, record) + records.append(record) + for k, v in gc.items(): + if len(v) != alignment_length: + raise ValueError( + "%s length %i, expected %i" % (k, len(v), alignment_length) + ) + alignment = MultipleSeqAlignment(records) + + for k, v in sorted(gc.items()): + if k in self.pfam_gc_mapping: + alignment.column_annotations[self.pfam_gc_mapping[k]] = v + elif k.endswith("_cons") and k[:-5] in self.pfam_gr_mapping: + alignment.column_annotations[self.pfam_gr_mapping[k[:-5]]] = v + else: + # Ignore it? + alignment.column_annotations["GC:" + k] = v + + # TODO - Introduce an annotated alignment class? + # For now, store the annotation a new private property: + alignment._annotations = gr + + return alignment + else: + raise StopIteration + + def _identifier_split(self, identifier): + """Return (name, start, end) string tuple from an identifier (PRIVATE).""" + if "/" in identifier: + name, start_end = identifier.rsplit("/", 1) + if start_end.count("-") == 1: + try: + start, end = start_end.split("-") + return name, int(start), int(end) + except ValueError: + # Non-integers after final '/' - fall through + pass + return identifier, None, None + + def _get_meta_data(self, identifier, meta_dict): + """Take an itentifier and returns dict of all meta-data matching it (PRIVATE). + + For example, given "Q9PN73_CAMJE/149-220" will return all matches to + this or "Q9PN73_CAMJE" which the identifier without its /start-end + suffix. + + In the example below, the suffix is required to match the AC, but must + be removed to match the OS and OC meta-data:: + + # STOCKHOLM 1.0 + #=GS Q9PN73_CAMJE/149-220 AC Q9PN73 + ... + Q9PN73_CAMJE/149-220 NKA... + ... + #=GS Q9PN73_CAMJE OS Campylobacter jejuni + #=GS Q9PN73_CAMJE OC Bacteria + + This function will return an empty dictionary if no data is found. + """ + name, start, end = self._identifier_split(identifier) + if name == identifier: + identifier_keys = [identifier] + else: + identifier_keys = [identifier, name] + answer = {} + for identifier_key in identifier_keys: + try: + for feature_key in meta_dict[identifier_key]: + answer[feature_key] = meta_dict[identifier_key][feature_key] + except KeyError: + pass + return answer + + def _populate_meta_data(self, identifier, record): + """Add meta-date to a SecRecord's annotations dictionary (PRIVATE). + + This function applies the PFAM conventions. + """ + seq_data = self._get_meta_data(identifier, self.seq_annotation) + for feature in seq_data: + # Note this dictionary contains lists! + if feature == "AC": # ACcession number + assert len(seq_data[feature]) == 1 + record.annotations["accession"] = seq_data[feature][0] + elif feature == "DE": # DEscription + record.description = "\n".join(seq_data[feature]) + elif feature == "DR": # Database Reference + # Should we try and parse the strings? + record.dbxrefs = seq_data[feature] + elif feature in self.pfam_gs_mapping: + record.annotations[self.pfam_gs_mapping[feature]] = ", ".join( + seq_data[feature] + ) + else: + # Ignore it? + record.annotations["GS:" + feature] = ", ".join(seq_data[feature]) + + # Now record the per-letter-annotations + seq_col_data = self._get_meta_data(identifier, self.seq_col_annotation) + for feature in seq_col_data: + # Note this dictionary contains strings! + if feature in self.pfam_gr_mapping: + record.letter_annotations[self.pfam_gr_mapping[feature]] = seq_col_data[ + feature + ] + else: + # Ignore it? + record.letter_annotations["GR:" + feature] = seq_col_data[feature] + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/AlignIO/__init__.py b/code/lib/Bio/AlignIO/__init__.py new file mode 100644 index 0000000..fe01f8f --- /dev/null +++ b/code/lib/Bio/AlignIO/__init__.py @@ -0,0 +1,480 @@ +# Copyright 2008-2018 by Peter Cock. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Multiple sequence alignment input/output as alignment objects. + +The Bio.AlignIO interface is deliberately very similar to Bio.SeqIO, and in +fact the two are connected internally. Both modules use the same set of file +format names (lower case strings). From the user's perspective, you can read +in a PHYLIP file containing one or more alignments using Bio.AlignIO, or you +can read in the sequences within these alignments using Bio.SeqIO. + +Bio.AlignIO is also documented at http://biopython.org/wiki/AlignIO and by +a whole chapter in our tutorial: + +* `HTML Tutorial`_ +* `PDF Tutorial`_ + +.. _`HTML Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.html +.. _`PDF Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.pdf + +Input +----- +For the typical special case when your file or handle contains one and only +one alignment, use the function Bio.AlignIO.read(). This takes an input file +handle (or in recent versions of Biopython a filename as a string), format +string and optional number of sequences per alignment. It will return a single +MultipleSeqAlignment object (or raise an exception if there isn't just one +alignment): + +>>> from Bio import AlignIO +>>> align = AlignIO.read("Phylip/interlaced.phy", "phylip") +>>> print(align) +Alignment with 3 rows and 384 columns +-----MKVILLFVLAVFTVFVSS---------------RGIPPE...I-- CYS1_DICDI +MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTL...VAA ALEU_HORVU +------MWATLPLLCAGAWLLGV--------PVCGAAELSVNSL...PLV CATH_HUMAN + +For the general case, when the handle could contain any number of alignments, +use the function Bio.AlignIO.parse(...) which takes the same arguments, but +returns an iterator giving MultipleSeqAlignment objects (typically used in a +for loop). If you want random access to the alignments by number, turn this +into a list: + +>>> from Bio import AlignIO +>>> alignments = list(AlignIO.parse("Emboss/needle.txt", "emboss")) +>>> print(alignments[2]) +Alignment with 2 rows and 120 columns +-KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKER...--- ref_rec +LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHP...HKE gi|94967506|receiver + +Most alignment file formats can be concatenated so as to hold as many +different multiple sequence alignments as possible. One common example +is the output of the tool seqboot in the PHLYIP suite. Sometimes there +can be a file header and footer, as seen in the EMBOSS alignment output. + +Output +------ +Use the function Bio.AlignIO.write(...), which takes a complete set of +Alignment objects (either as a list, or an iterator), an output file handle +(or filename in recent versions of Biopython) and of course the file format:: + + from Bio import AlignIO + alignments = ... + count = SeqIO.write(alignments, "example.faa", "fasta") + +If using a handle make sure to close it to flush the data to the disk:: + + from Bio import AlignIO + alignments = ... + with open("example.faa", "w") as handle: + count = SeqIO.write(alignments, handle, "fasta") + +In general, you are expected to call this function once (with all your +alignments) and then close the file handle. However, for file formats +like PHYLIP where multiple alignments are stored sequentially (with no file +header and footer), then multiple calls to the write function should work as +expected when using handles. + +If you are using a filename, the repeated calls to the write functions will +overwrite the existing file each time. + +Conversion +---------- +The Bio.AlignIO.convert(...) function allows an easy interface for simple +alignment file format conversions. Additionally, it may use file format +specific optimisations so this should be the fastest way too. + +In general however, you can combine the Bio.AlignIO.parse(...) function with +the Bio.AlignIO.write(...) function for sequence file conversion. Using +generator expressions provides a memory efficient way to perform filtering or +other extra operations as part of the process. + +File Formats +------------ +When specifying the file format, use lowercase strings. The same format +names are also used in Bio.SeqIO and include the following: + + - clustal - Output from Clustal W or X, see also the module Bio.Clustalw + which can be used to run the command line tool from Biopython. + - emboss - EMBOSS tools' "pairs" and "simple" alignment formats. + - fasta - The generic sequence file format where each record starts with + an identifier line starting with a ">" character, followed by + lines of sequence. + - fasta-m10 - For the pairwise alignments output by Bill Pearson's FASTA + tools when used with the -m 10 command line option for machine + readable output. + - ig - The IntelliGenetics file format, apparently the same as the + MASE alignment format. + - msf - The GCG MSF alignment format, originally from PileUp tool. + - nexus - Output from NEXUS, see also the module Bio.Nexus which can also + read any phylogenetic trees in these files. + - phylip - Interlaced PHYLIP, as used by the PHYLIP tools. + - phylip-sequential - Sequential PHYLIP. + - phylip-relaxed - PHYLIP like format allowing longer names. + - stockholm - A richly annotated alignment file format used by PFAM. + - mauve - Output from progressiveMauve/Mauve + +Note that while Bio.AlignIO can read all the above file formats, it cannot +write to all of them. + +You can also use any file format supported by Bio.SeqIO, such as "fasta" or +"ig" (which are listed above), PROVIDED the sequences in your file are all the +same length. +""" +# TODO +# - define policy on reading aligned sequences with gaps in +# (e.g. - and . characters) +# +# - Can we build the to_alignment(...) functionality +# into the generic Alignment class instead? +# +# - How best to handle unique/non unique record.id when writing. +# For most file formats reading such files is fine; The stockholm +# parser would fail. +# +# - MSF multiple alignment format, aka GCG, aka PileUp format (*.msf) +# http://www.bioperl.org/wiki/MSF_multiple_alignment_format +from Bio.Align import MultipleSeqAlignment +from Bio.File import as_handle + +from . import ClustalIO +from . import EmbossIO +from . import FastaIO +from . import MafIO +from . import MauveIO +from . import MsfIO +from . import NexusIO +from . import PhylipIO +from . import StockholmIO + +# Convention for format names is "mainname-subtype" in lower case. +# Please use the same names as BioPerl and EMBOSS where possible. + +_FormatToIterator = { # "fasta" is done via Bio.SeqIO + "clustal": ClustalIO.ClustalIterator, + "emboss": EmbossIO.EmbossIterator, + "fasta-m10": FastaIO.FastaM10Iterator, + "maf": MafIO.MafIterator, + "mauve": MauveIO.MauveIterator, + "msf": MsfIO.MsfIterator, + "nexus": NexusIO.NexusIterator, + "phylip": PhylipIO.PhylipIterator, + "phylip-sequential": PhylipIO.SequentialPhylipIterator, + "phylip-relaxed": PhylipIO.RelaxedPhylipIterator, + "stockholm": StockholmIO.StockholmIterator, +} + +_FormatToWriter = { # "fasta" is done via Bio.SeqIO + "clustal": ClustalIO.ClustalWriter, + "maf": MafIO.MafWriter, + "mauve": MauveIO.MauveWriter, + "nexus": NexusIO.NexusWriter, + "phylip": PhylipIO.PhylipWriter, + "phylip-sequential": PhylipIO.SequentialPhylipWriter, + "phylip-relaxed": PhylipIO.RelaxedPhylipWriter, + "stockholm": StockholmIO.StockholmWriter, +} + + +def write(alignments, handle, format): + """Write complete set of alignments to a file. + + Arguments: + - alignments - A list (or iterator) of MultipleSeqAlignment objects, + or a single alignment object. + - handle - File handle object to write to, or filename as string + (note older versions of Biopython only took a handle). + - format - lower case string describing the file format to write. + + You should close the handle after calling this function. + + Returns the number of alignments written (as an integer). + """ + from Bio import SeqIO + + # Try and give helpful error messages: + if not isinstance(format, str): + raise TypeError("Need a string for the file format (lower case)") + if not format: + raise ValueError("Format required (lower case string)") + if format != format.lower(): + raise ValueError("Format string '%s' should be lower case" % format) + + if isinstance(alignments, MultipleSeqAlignment): + # This raised an exception in older versions of Biopython + alignments = [alignments] + + with as_handle(handle, "w") as fp: + # Map the file format to a writer class + if format in _FormatToWriter: + writer_class = _FormatToWriter[format] + count = writer_class(fp).write_file(alignments) + elif format in SeqIO._FormatToWriter: + # Exploit the existing SeqIO parser to do the dirty work! + # TODO - Can we make one call to SeqIO.write() and count the alignments? + count = 0 + for alignment in alignments: + if not isinstance(alignment, MultipleSeqAlignment): + raise TypeError( + "Expect a list or iterator of MultipleSeqAlignment " + "objects, got: %r" % alignment + ) + SeqIO.write(alignment, fp, format) + count += 1 + elif format in _FormatToIterator or format in SeqIO._FormatToIterator: + raise ValueError( + "Reading format '%s' is supported, but not writing" % format + ) + else: + raise ValueError("Unknown format '%s'" % format) + + if not isinstance(count, int): + raise RuntimeError( + "Internal error - the underlying %s " + "writer should have returned the alignment count, not %r" % (format, count) + ) + + return count + + +# This is a generator function! +def _SeqIO_to_alignment_iterator(handle, format, seq_count=None): + """Use Bio.SeqIO to create an MultipleSeqAlignment iterator (PRIVATE). + + Arguments: + - handle - handle to the file. + - format - string describing the file format. + - seq_count - Optional integer, number of sequences expected in each + alignment. Recommended for fasta format files. + + If count is omitted (default) then all the sequences in the file are + combined into a single MultipleSeqAlignment. + """ + from Bio import SeqIO + + if format not in SeqIO._FormatToIterator: + raise ValueError("Unknown format '%s'" % format) + + if seq_count: + # Use the count to split the records into batches. + seq_record_iterator = SeqIO.parse(handle, format) + + records = [] + for record in seq_record_iterator: + records.append(record) + if len(records) == seq_count: + yield MultipleSeqAlignment(records) + records = [] + if records: + raise ValueError("Check seq_count argument, not enough sequences?") + else: + # Must assume that there is a single alignment using all + # the SeqRecord objects: + records = list(SeqIO.parse(handle, format)) + if records: + yield MultipleSeqAlignment(records) + + +def parse(handle, format, seq_count=None): + """Iterate over an alignment file as MultipleSeqAlignment objects. + + Arguments: + - handle - handle to the file, or the filename as a string + (note older versions of Biopython only took a handle). + - format - string describing the file format. + - seq_count - Optional integer, number of sequences expected in each + alignment. Recommended for fasta format files. + + If you have the file name in a string 'filename', use: + + >>> from Bio import AlignIO + >>> filename = "Emboss/needle.txt" + >>> format = "emboss" + >>> for alignment in AlignIO.parse(filename, format): + ... print("Alignment of length %i" % alignment.get_alignment_length()) + Alignment of length 124 + Alignment of length 119 + Alignment of length 120 + Alignment of length 118 + Alignment of length 125 + + If you have a string 'data' containing the file contents, use:: + + from Bio import AlignIO + from io import StringIO + my_iterator = AlignIO.parse(StringIO(data), format) + + Use the Bio.AlignIO.read() function when you expect a single record only. + """ + from Bio import SeqIO + + # Try and give helpful error messages: + if not isinstance(format, str): + raise TypeError("Need a string for the file format (lower case)") + if not format: + raise ValueError("Format required (lower case string)") + if format != format.lower(): + raise ValueError("Format string '%s' should be lower case" % format) + if seq_count is not None and not isinstance(seq_count, int): + raise TypeError("Need integer for seq_count (sequences per alignment)") + + with as_handle(handle) as fp: + # Map the file format to a sequence iterator: + if format in _FormatToIterator: + iterator_generator = _FormatToIterator[format] + i = iterator_generator(fp, seq_count) + + elif format in SeqIO._FormatToIterator: + # Exploit the existing SeqIO parser to the dirty work! + i = _SeqIO_to_alignment_iterator(fp, format, seq_count=seq_count) + else: + raise ValueError("Unknown format '%s'" % format) + + yield from i + + +def read(handle, format, seq_count=None): + """Turn an alignment file into a single MultipleSeqAlignment object. + + Arguments: + - handle - handle to the file, or the filename as a string + (note older versions of Biopython only took a handle). + - format - string describing the file format. + - seq_count - Optional integer, number of sequences expected in each + alignment. Recommended for fasta format files. + + If the handle contains no alignments, or more than one alignment, + an exception is raised. For example, using a PFAM/Stockholm file + containing one alignment: + + >>> from Bio import AlignIO + >>> filename = "Clustalw/protein.aln" + >>> format = "clustal" + >>> alignment = AlignIO.read(filename, format) + >>> print("Alignment of length %i" % alignment.get_alignment_length()) + Alignment of length 411 + + If however you want the first alignment from a file containing + multiple alignments this function would raise an exception. + + >>> from Bio import AlignIO + >>> filename = "Emboss/needle.txt" + >>> format = "emboss" + >>> alignment = AlignIO.read(filename, format) + Traceback (most recent call last): + ... + ValueError: More than one record found in handle + + Instead use: + + >>> from Bio import AlignIO + >>> filename = "Emboss/needle.txt" + >>> format = "emboss" + >>> alignment = next(AlignIO.parse(filename, format)) + >>> print("First alignment has length %i" % alignment.get_alignment_length()) + First alignment has length 124 + + You must use the Bio.AlignIO.parse() function if you want to read multiple + records from the handle. + """ + iterator = parse(handle, format, seq_count) + try: + alignment = next(iterator) + except StopIteration: + raise ValueError("No records found in handle") from None + try: + next(iterator) + raise ValueError("More than one record found in handle") + except StopIteration: + pass + if seq_count: + if len(alignment) != seq_count: + raise RuntimeError( + "More sequences found in alignment than specified in seq_count: %s." + % seq_count + ) + return alignment + + +def convert(in_file, in_format, out_file, out_format, molecule_type=None): + """Convert between two alignment files, returns number of alignments. + + Arguments: + - in_file - an input handle or filename + - in_format - input file format, lower case string + - output - an output handle or filename + - out_file - output file format, lower case string + - molecule_type - optional molecule type to apply, string containing + "DNA", "RNA" or "protein". + + **NOTE** - If you provide an output filename, it will be opened which will + overwrite any existing file without warning. This may happen if even the + conversion is aborted (e.g. an invalid out_format name is given). + + Some output formats require the molecule type be specified where this + cannot be determined by the parser. For example, converting to FASTA, + Clustal, or PHYLIP format to NEXUS: + + >>> from io import StringIO + >>> from Bio import AlignIO + >>> handle = StringIO() + >>> AlignIO.convert("Phylip/horses.phy", "phylip", handle, "nexus", "DNA") + 1 + >>> print(handle.getvalue()) + #NEXUS + begin data; + dimensions ntax=10 nchar=40; + format datatype=dna missing=? gap=-; + matrix + Mesohippus AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + Hypohippus AAACCCCCCCAAAAAAAAACAAAAAAAAAAAAAAAAAAAA + Archaeohip CAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAA + Parahippus CAAACAACAACAAAAAAAACAAAAAAAAAAAAAAAAAAAA + Merychippu CCAACCACCACCCCACACCCAAAAAAAAAAAAAAAAAAAA + 'M. secundu' CCAACCACCACCCACACCCCAAAAAAAAAAAAAAAAAAAA + Nannipus CCAACCACAACCCCACACCCAAAAAAAAAAAAAAAAAAAA + Neohippari CCAACCCCCCCCCCACACCCAAAAAAAAAAAAAAAAAAAA + Calippus CCAACCACAACCCACACCCCAAAAAAAAAAAAAAAAAAAA + Pliohippus CCCACCCCCCCCCACACCCCAAAAAAAAAAAAAAAAAAAA + ; + end; + + """ + if molecule_type: + if not isinstance(molecule_type, str): + raise TypeError("Molecule type should be a string, not %r" % molecule_type) + elif ( + "DNA" in molecule_type + or "RNA" in molecule_type + or "protein" in molecule_type + ): + pass + else: + raise ValueError("Unexpected molecule type, %r" % molecule_type) + + # TODO - Add optimised versions of important conversions + # For now just off load the work to SeqIO parse/write + # Don't open the output file until we've checked the input is OK: + alignments = parse(in_file, in_format, None) + + if molecule_type: + # Edit the records on the fly to set molecule type + + def over_ride(alignment): + """Over-ride molecule in-place.""" + for record in alignment: + record.annotations["molecule_type"] = molecule_type + return alignment + + alignments = (over_ride(_) for _ in alignments) + return write(alignments, out_file, out_format) + + +if __name__ == "__main__": + from Bio._utils import run_doctest + + run_doctest() diff --git a/code/lib/Bio/AlignIO/__pycache__/ClustalIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/ClustalIO.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..967a61609fcf7469b5f1d01417d8f1233012bf8a GIT binary patch literal 5394 zcma)AO>7&-72cWs;c`Vul&t8-b(~FLC1zqXcH+d2;kr?rIB;vrZXCxBp@3L(R?!H48 z)C|=(&^K$Q>YJgpR;U$J-wN%uVy(!uw>41^_5)4WzPYK_9E^*?!PxN)jEBU~b*)nB z{Ns(FS)B`mrRdUCH*U9DO_{ihP3b<<2-|Vuh0nOncG7ACD4T2Uh08atzdLu?z3F<9 zaGS}pFWscs4CAWfyx(lQp7dRRqvbaeUtq#*$G)2^2eG@>6z$M=?*tykzPq36VmoRi zK{JZoqu`5Khi0$)>)6rWT(Icg1B1I>ln~*&O}XldK!Q_v&%GNY%Zl!j*J^=t?60@| zsNu(zsv-d7G`?12&?FbErVB3g2U^V#1|)B0!xwS^=VUtvd&9e~ zN7|sxxpx)EYJ*79k4(qLdjW6!G1Ig}OW7t9Y?aH21je$-?z5C_^BtHAU`GMd33hKJ z(Nnf!Y_cxDsYTO`UQ>` zxy_azxr;&QS0z-cb)LEc$8(pxJ3vj`(DyKEM*hI4JMMflI_|Hvl6wPKtUOP`XyFnf z(>lh<>Y0-r{!+D5>HF*~ys+m2gLQkGaU7>}ypMXzJvtj#{ttRK)(a8!2AtQA;&wd0 z`w~vDdl=0_=4PzEdj#E#Ih`Z!clcFCGd=X9%z4iX+x}ZpHf3#M$xrG7k<~H1lq_d@ zoXE_s-|=OP$dQfAC5a4fx08HP<{)y(B&M*CjW?QMdo8Ma5%l1xK$AIjLZ}DPVl(4` z$O_VLgzZZE_zt>Z1PvFa`P@d5(oi*tDpf3X)wE?up*?q#ED?|br_ z1ef@SXf$48HY;&k=WGOz!wftQcXXQ@1ZOsP@a1SRtC#c=W)0IZ4zYu*tXqt;gE}ba zohQE*$f}QUoJXj$>-ET6^Xqj8zMkI%=$GpC^|lxGW(xJXXg2C~`5ZQ~LST7s|{a+y<520V` z^@qg}1-}8#2txBmso6EIY3m;$kg4mU%QvQTo?;a1{@#bKe|}S2LIB5G5TjiioQqp* z<6kMK{faVzHJ7IbEBhlmKU3F9im8JOYHU^8V)2>YXnfT`$bA#%JU~~9y0olqCv}J> z1=F;kM#H@kg8ZtD6~CkPgtG~0?)3#z7r+qa)QTysJCqKod5kw1#-*f`4yC0{-YtGO z^kFH-OnHjSX_aGVpC`lj*4^z4#vKhBno*Z>W(Y_erT zma%4LutwP^l8G{`OULY4%pS}wH;4B9_-l4Zn4b~V?`q+sb4CAWp#QzJm<|o-e!KM- z(z2-`+9-cHt?Al14SWG5Tc_SYcSigrX=QL$zGceG-{={pJo2rcp*TF^8K8&V4?Lr< zTj=!nJ!^S)pLWbx(tdh=V8$P*5j=oA8L63#ojz(=X8cQq^{AJa@((#2a@_AFru?gd z_q+rdeXDC)lj z^{3K4%wE)c7Hy~FD^tQf2VVr0>2w^o;nSVUHBYM4)psL*BiYLf^SX~}wmcoG&esaP zG_hu1d*|vK7p~4+oD(Nsdim5Vubw{hnxxW4meA}{LA0x*=}cc} zwnL#Px6}zJojU7)+VRKD9CL4bd*z{vMC>-01N|X8-Z}aB6+v_dM|te3i=6{EAg*4$ z+hZ-iaZ|E$>XM+Uaxp-TM}@W9Ty&8I`tk?v+Yrukv+;8-(%8KknjoF&t2koVY^dxN zl5&rqcV~mMw{CS_x!7!@5T6a)Je`D?qrBiAWU0qcE@L@rjeMg(9^^74LS!&nOfRMC zsrJF41>Qy0l4uETSy&Suq5V{U4~617dn+*v+ZpffTN%cH{L`s@yDtgAN zCo&dfEPf=7dssZTpG`YuB%Gq zydEIqnhlP*gF*yl6V(ZQI7v}b(pH&~lp1D*dTw`_En$D8xNP`((rjH)`5u)NCMt}e zg-Y0{%?qo#V-bE%{UBDM2yVE$l*ySs)Wh(AWGy6p@S&u z5)=|(m6_6umi*ciJK!PS zE?}8~|3?HEi@1iWWj(GH2ckg@;;*A*3**d!9fl&77io!3=O~1J0ujXWjG`h?cgoY$ zMTJD=N-Uqks8$|WSFgr-M9C`SSL*YC9#4`ha|@+}V_3{Wfnn$)Yyu?&XNL^~1;#Wh z;W3znl469FOe#hQ%V39i2_;8~qe#&m_B<=AQe~2laE^W%6l_2a;tL!;$;PqX2EA#N zFyo*=^PtNC$9k7pC|gkI7?Xfo>g9ZrqnyE7aArBTakaMzeH z!Fu9b!aj&I?ZX8=R$`Z&jbo*al8K%Y@Dz+YScOoL{|l5G9tw@V-B&Dm6jGBFYM!U& z1!~Sx^J8kLVBFU{OT%Mm7VyU8iBuw?NFvyLrKFbEii1Dilx@`}2V>NWmm;Ws5oK^J zDKJQ?vucIjpK;2sYs0{SW}nLozVkyE#Up4?UiKfoeCfZF CxqI>e literal 0 HcmV?d00001 diff --git a/code/lib/Bio/AlignIO/__pycache__/EmbossIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/EmbossIO.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc69b07ad614ecbfd19fa4f7450f8e83329e0729 GIT binary patch literal 3863 zcmb7H&2JmW6`z^iC8-rfku?1wS2Z>Y6qto9+esR?Mv}%(?bbjd2ewnETNUh@GnCdM zcd41BB@xRUtn}6dIuu1ug>)&}d;fr*dMNq_6nkwhz7#zaJ^0k$%#spaw?Vp#ot=5_ z&HLVYZ*R`cxdhi+q}*OvBIIv+F}Z9^-ovAQh=CKIaLQSi_L$G8o?Bh3XZyC9+g+zu z@C#<{bc?-`U!vp(gco@6Gs26)-naZ2Ub;`3Gr>9~;6Qi- zCA?%aR9-LUgRbzNgaOvVo0ixaMC~MuBjsI$OkiD&y)U+*%?U2-czY1A6F>s`h%QUJXZB;XcA+>3}dAM#P22un(j5dsg7qv%rLM56fd(_ME_rl2A z*$fbyO|fSfwS#Wga9A=vMH0a`OSxy4#$H9*XqjtmB%!$)7-#Ujk4Js#5R#I|WJEDi zPPa<(d`b`55gDSwLrMN#SWW^*E;RaCGMn1_^!wze4@NU58Xe2Pz9~5~+&N1OPc6^z z)N)i=^Gcn=@`RYP45x8oClM8SU2oLpuw4<>;X$<7MN6>iz7Ni9} z^ELt9tfBkOSJ2HGy0W4B(^t@y4c(lf`!bn3vA{^119lwKZT^s?wvO`wlPsz5%07kF zh?jvh0GT%s@^~Jw8Bo28ESQJwmYxG&O>GV7ursjbDdn{dn)|0gMlNDL%dJuQfbD*f zvIARw2^>ST>RUEezeeOSH<8byLQVAN0I!cTVj5n-?m6r}1}2v^@Ld(TQ^mWEcRgz45bm5KQ_$C6*l8VD z>RXAM)U*ZK*8jFAEh7^NDDS0ZcuC8D1TUS|tBH@&`P50v3k14Y>Wj&WYrnzU+8a4W zXBJ?yh?^Jh4v^L0g`O zZf9P`LEHJ99voPwkjGX-9I`;t+BISkWKlim{__5_#Bl!<+|Ry@o#Jk^10U8Yj%Ad& zbB&D3WAEAWS2}+p5-sAKK`xjG+VXc%8NAP>2vEN za=^AfLDuUyX{vvk?{|{lj(kmLGjzt80voV~oNB(F&U0k;somR29g%eR8P1yTnCz>8 zvW9qBX72;+J#W6BQ-|f`RMIfG`E;It<8w<#@xa-B3+Kt{^b}5k#m{vC`R1_sk*<^G zAdkdua#95H8la2dCQkOQSX~=leD$5LcS(KTKc}xg%iq`inkl4$x74I^XN9haHj>S( z6mnF`Fxh)fFZ(X)N7xM{mcET*uY5*J8BNg?a_$ylXm|yt@H$MxLF970+}veoik9U%55ud%}o| zUCj<`WDso#?-Dfiwxw zlr4wX-DNdg`@i_C(IwVd@p^y zYbb6>iAF;ASh|ro_M>-cDm;i44?7(pbz_-F`+J`5BHQr*&VD4&KAPQ;r=9;qfLV-r zXRP9ClVvRA*(?{TEyKQUBGEO?22m6zfoUn5XR<<`o>{TAiMB?_tfbo;?b&SZeiHZd z)(Bg(f>a6EWRB{0!z6RM@eatH9B746l3CC)tK3j%95s7mWagL{X9eAQMm%#m-8e`x z_eVi@kh{S@J zrTt3osaD+4AB*xUk;(kYIr77yDn1Yt;2W^IYRjD|iqVw;PxWwJ_Tj>%Xb zUs?|Fiy8@IWKGM32f!e+0L(0HA`~~@$?u803WDcr?_Xd47(P+!kHg;8?_RlbZT&_R zpx5TX8|xx^qSlkn)%6d;N9zjR=B0kn-U>E^T1TIM`9}WVae3UQ-d*kQWu;b2pEa%4 z1#nmYz(81mxzx1_w7_g?vkEOyo0jxk->hyoXqCEnss?wCUZ7`aT|+Dv^OL7xmBFb5 zNXaVGGGmm{idE5EKv|*Za4%XVO<~pb$jW#w&@!YoK+9;^T45FDf_jlIX$Y$ucmbnp zRjn6c25M`01sjqWV`Wx?juoo_juq%oH9FKSn>J|0VW7J}hi6~x+gB$ZX%>ukf}Uu# zGPl*r|En-Bw_4kSpd00wVyngDcB>`z5hdTygO1eH@VZ_WFlY*;Cnd*bHZ!obrYo-j zC$c--99zX5WNBwYXi literal 0 HcmV?d00001 diff --git a/code/lib/Bio/AlignIO/__pycache__/FastaIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/FastaIO.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..590a8633782e7efdc114dcaf00c814dc29b89ceb GIT binary patch literal 7807 zcmbtZO>7)TcJBX~=^0X#EX$JYcw4k6a{_Wm$?{sJWo0RgjM`T^~!i zP7Ay6xZEyr)N*~kXuIr$;u`9Q3zi{hwk_PQm%IN6!{4n--!>xPH z&Xx!9__5;)n|lq9^HfN#ki3rzz2QSxIEkhkJF}SI@ti2cMhAyPi!|N(mLGZ`Y*L^G)aCqEnBWO*dptfOnm!MUp=%B^G(YS-o=~xDO#BT5Qt`J?_^t z&6sw^8H~8U>4$C*?ApwG=milS0ho3}#}B3Mq#WVch}5{&IL>{HjB5i}8$17v2nU-g zQHmzjqVgfBC)6X;DiCI#8w}NejsgM{bN0@bU*B@-;KU6?Y80`U!#@Z!8=`pS=);Ewrh0FZ4=*Staml6Y4CrE4VLRFG}BcZ#wl>}mD<&)|JUFa zP9yKuu)UQ>F?Z|nNKOv-Hlan%?A^7SpDf>BA$4-_`N#`eWMP?o4^6EJ3zX*Phnr9_ z=4JTzBd8gy8yW%nZ5-(HMmantF-JzxYzw$1pNm+o2*yGYxG^~$9CbK<#2+{tE**b8 z!bw46W2iaIGjdvy@TCiqRKOj*e)QehRGFIYuCwJn1dmQ5aKmldL?!^S743wQL|f2s z0L!HBd8fYRlFU2~j|82EzGCq?=VnSNgw=5D%!8VSyMvwhu+YsHuK}(li*V3V)90KG zxM(mf-)%%e5be;*BwPnvwF4L=89*JjIWQA9&g8xtNrA!DNLQV{0Z6%Og0zX1cSXAxr!P7u{9|(gEGPvygegN^3VcQ}a@*tc_Ss zmGV`rCeET!j@fESJ*`fur}QcHq&BI(rfT?`()df5T^)KZ;;)3~4Lst%OoSTnSxoEc zOz#=Y=$XvySJybWx^`FJ)}E-M6dPZueH~+`B%}axB$Na6rxGdxx-21++1q)%bzSysjNg)w z1?Urkx`g{z1d(*)n&6gAZWG2o_X}wIeUsYpT@EDqf8ZaIc%t!JeMo*A)WmP(swD)3 zscdw1Y#@_-SUcHK9nJb zda$oQy2^;k2~g4bZ*lr2WTW%XGCAtv^njX_QsId%aQ2zp3i}E;g|S zhd*E@M_h>Sh7|HcS-uP$lWXi0S>O|cFS1wI>2GL{gVWI2Dbk`Fh!)nh-b8OQY%z`5 zfz>;P?=wNJneR=(LXYhi*x7FBl+v9#K)SMffxY@1rPmHq`cw(e8%k_%+gSPMD(qZE zkNpWz`5IR9mG-$3YM&|Cfq@+u{FmLyzTTa} z&S$$f_;(n~$uW}G>lh!GSNf*O&-O7+UgaI-F==xuJCk4PTf=cnj_0wG$!~RwG|qk` zqpzZz(Y7_7dKgDa_u-hvfO21D=dgo7`k-lv@pr&@W2F7SI1h}UvKe_QCjVE=6JIKu zIZy|F4_Fs~b)LPMVg2V2>&>HB4>}JzpTcJ%#+!9}aQX6zp%@qGipO2V%ERbw6uu(^ zd_jPgW+8)+j&d*jQ&LHJ${+xE{yi~BU$R_llxE7PAYBy+*(P^!&r=jqN5+9*?vJN8 za`i#BM1kp%j0q9hhv2VC2?6tv9|j0xHn0yFtBm%GkP|VQ9Nh6X=w6nrZN+hGX<u`WzXN?^Zn4UO-HTi~~_m%SAZ6l*Y!yhVyAVy<-@S6B>|A+!>C)xPm*>%3N#B+!Z9Q;>6fR|$AxqPmL5A~!M7tt0-r4dn zAeY=j+yeQ!-`E|TO_p{dfg8_A(UsxtAoM&OWG;m)B8u8H}DTqt7Gwh7n6nAc%lk(D#61@l+c&J(+c$;<0&in5>mwJ}nNR%}$Th9Pe);)tvFCof*fiAP*Ufq|(XRa{188sZLYUM|``7*&179MmX}H zD9@r`ffp(WC+DDs5)R!aQh}`FGpr()ITDq_h!Ad4($Pi@W&;6!nnPtJ4A_FyF+WU? z4^gS4k|heM5@!X1c`0r0k=sPxI|sc-4u$ZXRCEhA3W}t$L@^zegc=dD1WvY$@(3)2 z!D^O~Qq0Ns+)>el5&+_K!;R}(P)fH>yOhcmNlLU%S{Fc(hH{3?R}eLNlPpZ;B2+D* z)SYy?MY%|7ly5EKv}4Mph`&KR4gpdUnr*rsw82P4 zAFKVM4DWJc5pYhz2$GqGvIH(3KsqxHw0-?R>8Y{Nw}G$qG?Xh$1mHY2`xXinC@7#{ zpmE-}dU}Y!uy27D5`=UHt&Vw}{|-D@UG?u3mh0PS^Zg=PBm`*f6c*XMqw}G`c`=rfsgg&1bYP+>FI3dw4CAW z%wVQ#?oW2DArIgj!Q0usA$iC>$3u>d^N?fXJme$~KlF2BJpBF;59v(G!w)ay;TXy{ z27C2q8ZPtKRy(h+%3=fj20Sbo+^#djVf193fU1Rti`JEc%pVa@S@m9bz!WQ@@F;?SaLcgTP{RNGkOzQFLpP@l9Q-* zr$Fu+?pC5oPKKyyFGcN4+NuAqkc-5trX*WEHYhmOQZiKwwFrP^Bhh@87=)b^nJ4N< z$0J0xL&y<(ewdh&vocaX zyjh;&6ZlFdm*wq)C1lAU(Q$7iCJO1O?UF@u%A@(~gle^0RP*6v!kmnmQ=z|Fz%`Q= zHmdm)j9PMu#G+Dm$T(SlIajs3hg6#2XV6X52Z`ED(Q@g{zKdlVa-TGmKQ^Qd~(s?Ldl!6v7cEb_%BS(v!`w(+*(|4?Gz=ZCUEzCy3+1 zM%xcqZA-Kmx?&xTqUKSIDB;gW5yD1k!ZHA)CEztl+OA8eVd~fP@q1Ax>0L4-ipGz@@J1vVb{2sI%jVCLRRJKQGg0A z?o0YvUOc z>R8XzHG?e5yj^6l%lR;sjO?A2E9tgci~r0`#Syt4Lr?W2|D8!KyrvE^Gg|9o{Yc09#P z$0T<3(9rGW`yYH%of^mb;Lhqj{w{GxHOJVi=1DKfyCw#mY@%;_yNQ{)l8=BVw<1re zT;lUIP@#sl!xyQ!NX;c`-l7J-NhYP4&zb}_^5PVAeV9IkH(L_F@@D= z{3KQ%r-U&$oD<+`7W3nfhS3tnEzq*n^IBdl83rh5SP?bWLn~r^V3U(dpDFNLQcuap zMlBYP##bS!vbtI>B)M9RMfF;(T7bf(KhnyppqiupJ@0|MM5+^6Z%yVB>Fx3ood;1G bv%aATCi$!BuV&X?28N(Z2prR&o}d07GY*(K literal 0 HcmV?d00001 diff --git a/code/lib/Bio/AlignIO/__pycache__/Interfaces.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/Interfaces.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50cee59ccc6718dac9a3012e69c52a7fdba2538a GIT binary patch literal 5379 zcmds4&2JmW72jP_5=BdP+{A9u!p+CpYxv}hV7C@d##LLj9D$AJ+-#bUHG6xUtu z(lbj*BN^wOf20zF5U0{tg?>R-WYPsufx!l(Y;4EIA4UE3%c6d7W6IkWTi-uu1Z zdvkkzz3HI*!dVOdb_8?`@tBRY^jpp5 z$4M-;X5)0kxRUHhMOl;_FwPQr#D-jHsTgKaKmO4Ix1+Q*-(<_o)BSW53x*AvSd?Y` zNY8g|G1ku5P^E{Fkecmtd=6u-H5ysUGBsvA5llyeVI1|wG&MQUt!4=Mi&DeCTd4Y{ zcu1$~LNlIlA@Qowhuf`NO>~#UifDZ1beDw(T{LG`%`pcu$=WzP&r&t{`|aWkMwtCk z6blmgH={Us1W^)YL15<0m*koq$vAGAzD`Kmph9mC^<7l` z5*|}`>IBZz!_yTmLjg1t6dLktv$~dXPm#U5t6Dw^%t8L?* z&9~loODB}>jtLnyRWwsK)uI9 zsawPGtdRrpI-!d;aV{^rlkb?(Rex&b14BT~6nYo9Ttg!Rhn={eg2#Z#+T@S-tbY*P z6M3AKKmfRm5eQusOW!wiV%@8>rjo-Lrxk3UC}hlpRM8=t1`6a?B4s)y7lP3XdxD;N zqq!@Q?kCKwZvCKwl^j!2bbNRAGQidbLrIcciaScB%6AocpfP}bETKt?BcXy|3oT80 zaF!RV%?>-GHd@fQvsK&Fc?d^7A+>q@&(N#&TglQx_z&q1$59ce6cn+LzH-*F1N;P1-r-Ooq1i@PvV?nXL7!-Dx z?{@qZ10_C?K`?6ufeqsLzZL|KMm$dJmz5w8X&3}*87rG3D9SD>s3hN21WiSbc^OsH zNgK;`uX(+(ZhAv>6ooK*3srC8aVFY&qK4R77l^iaBli$1Ce}6p(AC+Mf*T%~RAKVh z7mBntzsPxUL7X)MN=__7uI1n7imG{#J`+(x|mPel^q zg9tH?2$aFCGmdNmsSQ!Z0obwP6}h4b-iw2N;XoR3ZHL8?&L|Hggi=N{PjhJb|3p2d zr?iti^Q2Hq8KR|h3YTdTj~POA$m19s4nmS9NBG}I0+XN4qslySGl8i{SxPmu%1HlQ zVMDFJ=yPDqn&mp@@G)yzWRtA-3z%H0@WTV+Ppxv;8OV}@8_2$bKU4Q}b7MI5@U98( zl!8Znak!hKAwsQyKhRAHE^NJlJocMLkBY2n*w346JXP%%iIM<9)!?7ZJ#{{J zP8}CM@Yp;4!PI@v`R(<`HT1gE+NtZ}b|kzvoyXpzZ#|%~9*v#ay!5N8k9@-!l`ur* z3#iQbf)WE^d(0wJ-cFC1#RV@dnqnn%CFmaNB7RzlDUk`R*SI}?=_A~ZheTV z$zkb{MOutJNfa`&0QfpIwBQ%Q#A%B%g>|p$s*&8nU&>hx=VD7*o#v@^$zvXZ#UiNJWEarOzGMRg z&O`Ej|Ea2O((;VwJrwG6)v0qo|M~y#zm%?xj+Qk1y`hy`PoHSof9FH;XX54qTpnI% zwpQ0{-8MRUUB_>xV{ByV8J&4%$K1%)vvQy9!Y|Y)hplD>=C>CM6=7z=;KU%%pSK#?aC9qUd5X+yNWl}$3}hJHWszoL~{{? zy4LQ^UG212y0>my-uC8Z&-Jb4o@@E5j`i|JbNOX!W4q(GH#?5iWU&pWD+M;1{#>ba z52Zdz-b*v`{$|g``x(o2yq4Qua%^j9*ZTPS;`JHJ^Lwu2Sxw7x+%|4{%cZY&;92c1 zi?q8d=y$o@aptUBzSZ_H#J1mDVkXA;XtmdIW>=hUZ^J41y#vpK(Rwr1hT}KwrjHi4 zTdP(R&Gfe2mSeS6-QGseLv;?U>H4M3p4axTh&TtNU-2N0 zJIn1Zs`0JkZgA_Yz;S!COHGfh9aullI@f8hJJy|Df3?@G$vO)CU`s96S;0hPg*|kA z)!Jxx+Z)Y}+B38j7eEIcr@Mkt_qrfI*$QkDo4}Zs+pgzZ$%e&)F2y^W%neJh-Lb9J z=I4&}@S%*+x^l(3`0$~$hb(L}b1pr6$f0=FBaj7l+QaT*jIt}vPIChXU?y$hH)ZT- z<59cQu~yn>oFm)-X`=>eWBE!$YvW+($w23<`>F?&aUHBL>NUIV&Fv1>KIuUXuat~9 z?GL28DJyDq+L*l8X@iQFdfQ#Q$>(Xqi*V*3aVxsmi19<+bq9v~4GjKRn z91YA>s~fgn&)9~Yd7{-#+qARz&DuFTkKdeK03$3$#gCfHs?Ff-tF4x^>61e}Z=_al zX$_|d%g7-}DV`E?s1^szKmKNLUB~5}L=tLi+8!AFzOE|R-LBp zxO2gar%!|bTAw?vhtqQ9lI3ApK>Js^ptVwP;}Vb}a>w(k#+-Gh#*{$1io4Zk2x@f3zzs^Zh*l9*t52n(E zof|IH;zse01&30$Bo2WV=qHlxNl5B z4#uS+3FUb-ey;eYpT&Ldv5wMNdu-o$tg}RDq;;yQPEu|q-_Kz4@_PkH(!yRb%_*S???kEI5RSHS*P^POr6Yox5}Q)+bl*&DZ9l zoRTtb9*gj${+rMKFaEyNXCvJUvS(&rzvz9bTan%f%rlq1A3mOFw5U2aHZ zuJzj8n(2=6AqNS9Bk7LI!*bi{*q#uQO2(Ky+E6>JioT63p?xh=)pI(ojF~ZvD((#3 z!d2DFMsRFE9tUct^pQm~1zbWOzrc+T3i0*FhBx8kU_aKs(Ea#uhx!feF>-8k-E=Sd zIOu%4d&ZJr~Gh;+S*`Uy+_@&sUc!pg4peLDLn-V zkS?2wE@vPix_%Q<7%vlIB`in=6r7w-;0me@W}vuX%K3P#d88L{d6P(1u_T|B`#QJn zvHnwI%Uo0(sHVFX-h)vYa~C6h=MXi0S$x-Xd@k90b+*NZb6PMnpch4K=}6|@#er&e zy!YnPigJ{KGgv2uasXr@GMfRIw=Co(si9?z|J<%~KfwTycf?YNXnk-mF)wxxF| z&Skm|N(leV;n)PT-(w1dF!RVl5^Q);<*M%k({KB6$&_WducA&Zt2jn6-I>T_ zBlQCKg4gJ6`z{ys&;mqbhqm+kDC511M9Y=*f-$8R@`i32IepTs=p|#qFpUy^%lH=@ z|JpTLsO6(VqtV4aHyTl?(NORj_vJ=oYrENr-{cz&yVq(o+$x90X}E7Np{N{6$mU#X zc$aW_WJ?8%sgNy9OK!@v*h&#si^Z;ibZwB$3Rna#^Qo~jw_C<}02AtGV-}=?1-%}# zAfum+S&;EmcmHdc-2-;n*KvOh_nCcd&)nDgIfy6-GBCT0yXxl@vkP-}?zm>>AtD32 z(9d_L;+!IKe$Or;$7H^rNB+ohj6m+nybD$CDDUmcADR6E%8VV?`bE1M7T0s=|72)- ze}=xs$yh_vo;a?p75XL2YGh~5FNVdi^wN@992uHg{D2#FI zukLHM0roipCR_~7y^>${N5dk<06cbFQ>kr)1^dWPvSg**-GdK;vWJy3(U;C~d3$~ZB4ms?{Ysb#D~)Pc z`GlfiXwSM8%=&nkr6}Osx2n?13G`yPui7twDWp3C7&86046NH2`WZt%*}ZZ5nk^P&i?3Hy}&5=7FN5?T-+s!fjEa9`(>SG}WcRk2pn5F}Rc7qfG1LXK_x+idI+@naI5efz05(Z^$Hi3ev z_oc#fHWgHgv5SiLCZIUM$ulxIcY+C9aVd-3BTRTOpOL{oBZmjTnP%>BCPX*x3rL>j zkVN`iaN#CZE6eWr7dBz8=!x>H^iH595i8V!(?JQhMVqHs-{iES5oH6}Bmi?yO1tey zcOrf3Oa1ghF#Gk>+wP_&0`Al0(itz(oAYIfi3f>&`CGW+LHJ*_McC z4@2=dcSyEsAID)&vw>_$`ddI&AUVVR4qj7_%X*ugwvPdRY5FOR1$>i5vdUEy6sK(7Frca zHlxcT;mog&C1ntczN25m9b}6}BZlKEzhPd*e=Wz9vq!Yp*1CO#TY_1=fcDv=* zsserMMQm$B^6Mk*Zll>)!v1$0wQ&H=M8N>t+>fZlv>{8QNatH@wU7`-l!101WxCFz zCPhuck+1P`eg#pF46R%r#l7D}qUBA)gxE0+Loe$ThB+W<`~#-W znPq?mTp81ZcqGcs6fz~<yc2W9se$jD|8U{o}Fv0GH&!) z$*$OAct2uS?Q#5;?GyG9dlL4{QTwPpg`A3g%$~;Yn0*`+`$AOUl67roC-_tO5i_B} z*rKB=CU_6vugy}lC^ju`3)JoWmPHF8ZCNzoh_P{LyWR1vZLln>ba$1(5S#(l7#sFTr%T)8YR~qp^PbamTdU_S7--Hz4-?91Ire!P*Q6m> z0by4p7i^JA`Nv<9@pW9@kB~q_X$^pBq`GgcW%f)-q1W!?A98b{fi&+IsJBA=XpN$j zfL6S(yPx_adu84se-t`QWgq9iG1i?rlwVc({y6m@X=Ot3grFwt3Z{)|bb3upjLV@` zldMo#4QUTqz|kISz$z${b+&{Zjy0rcgS#OaMi1v{vx!rlZ!UT21{OrMmbR}ERn9dHsu0JXTWYFr;t)FY#+7n1SSD+lL&h*c7m zLfP^gjSN(HkIILJ)0Q))aN$Ja4D&>nGQoH<_PL}+%E>9oh(?7rcF@jKefJFl%QYR4 z5qnJNV*`o++JL|Ngo-!p476c01dy?_D4CnmR$z+i8vuf^h5**7+POjPe#S3Q7pCpQ zT?;dPvwKoWm7oO6FfVP%<9^@&bbC?i(mM7Usfy(sr0% z#4+>7*CucsSvJ;7CcwcC4FsZ+|4N zi-!0v?tf?g?!3tLE7sZADtJ~9hdT>02(HMp<`mBy|U_Cz?s4Y>7+O?)H9wi)V4SmmK@mWg+a^RW)QV zKJsC^y}S&QIQ>gF%bbAq8&{yj)`GXbaq|a{-PvIFj@$bjwrha6gu4p^LI(B=jn51m zF=xEsjc?r1FxNOM&@y=C<1RcTZEw}FF95I7Vm;_}Gag4^#a&##{qe%RbLVSnn|sU4 zFnGvD2Xm>d1uGC^ju{M>Q1py9XIa;Qq_;OAqA0`O4=l_P`@5kwc=7umf#@wDHOO-NjqC7Zi$Wunw`7`;bW! zNtA7S;M1N*wCtTm zOp#G$!&`|836a!C{Eba=T(bXq(O!zhNxeM0{Podzks7i_QO0rILkzcmdtj!a>u8}gzsxgcXc`vV#Vi7&ft(CKbX6IcOE8v>)zFCAI=Z4?{n11?#+LCZzg8%ysy0zT)wriIDhw^ zb!*|??L)=mk~Qm-s~>(mzi6FH+5QaK;yc0nzfRrZBXf4fn!SVuuKzj>9OA7r*4Z=O z+1fkqCa4$E=hv7=KvYAhCK{@e3+V=w*Sk-p3HovM{QV_-N) z+B0&hgY!JxdfX}`LD!yvOe7~Btoi#h@(+vkc05Mml-8}A*23+3*8Hcp7Vj-u7^H%U z*6oG331Q|OJ*r;$B~?YyeST$m^(%VDpr2nEFHrT}kMGH&=vg(0&aDA3&F%cQ@5sOV70NKlG z_>N1x8k)X=BK472NWnF}8|46EHrl@E{;`3%o=>Dg8Afd=__5YLiflQO4F*L-*`+2# z++kr*g(dg^wr^4r+(W+_uy+pJo17nboP6Ab&YPbApFag{8o${JN4HHj%;0doK9yA*4iVfL72n;0(y z6g_A_@a8dX-{2h>u}S+A%$~0&ES$61ssMM7fFYNIzqsr8+b+W!M0@WNb1S`^XwGRO zx<^@ThB)%Tt@Z=44Ss{l;5gK31<@lw$%_AYn4ixq{B8L55Ie$VNC=o>Oq_#&w1mO8Ekt$U zVEEF+z@4nLRxa=}G53>NpGk6zceH9eBjG$F-9Gy+lJ+f#g=e(Tx4(ygFaHN=hp`S?zF@qf%ReKS902w~(UdO8 zqf?+xWI?*H!87gb3Sv_1TpwJK(E;F*aLx&z%Z8Z``J9vA+|Eo$C)q3gJZu~}aM=%G zn*di_)bMP^CHb4%dJB9ni@hxb|1jozY{o7aw_X!hjsoLD%<()7f z9^hJGXlF*S_ziX9y?V95C-JE?yw&!yUmuek`UK*PqC!Y02TL)~4sS|>HV*Bv`(yO? z*W6o@mX@nPxuY4Is+}llRMx;LhD~cRPfE9$5QHp1jz6t)O0?2&s%toM(6;2z>A_8G z3~arKvuh?XK=h<36a@omOTx2o8pUWV(9%jw0YgU-*2EQb!yqkZ-TfYJ-7XTiTo`}j z{!Jza86ilSc2(8UCm8#HW?#)(nyz0>dEjd7xzX0A4)qv4@)?3|NKFwKcf87VR_jI^WxJsw*InP3G*nf76a%Hll`651a1?LHm$PcMpsjMf#N)j6m-n@;4vu^dM}DM9_OvK2gBY-+1}uu~ z0g)+JZU^2k;sarZ2ojC0O+zt7ii5xp@jUPCc~Go}z#?%5!^Z{RFe0|)+|7`5VYH-f z#Jj=S@4^h;g^f!cW#xGt>hQc&_s(11s>I;JUf5J&s}Ly(5|T0pkOy9R&I-fG(?Ddn z&1e^FIRgoD9^*OCG|yKeuXsQ3Hgj3x7C8$qoxr&m=B+k6%d_++NQgRWLKq_6wAa0y z7X6wyZ^Uh_TtIKLWFT=M`s|Y1djyRHk+WDxiKxacJdGH~tYV+uh2hYu*XAL6chmS_ z?6z8XM1wP6N&MPPh+QZ$e8(xM(55%%cj%c)y8&uDgH1YP)9x!E?=p;=C=x?GyscU9 zT9*Q2#tL5lN@eK{n*`C}tROVhIwzZRg>9l!{MHtNpT*Y88_;@hEx=SA(+CPAs`FP2 znz4cjjt$*hA=s-6HvlmKSFX+9yFWj_0CR@LJpA0g%QVSX^ubpsUBk;8^LMRlKX_iZ zR~N6pGXRgl*&!?HEL!{^q0?bNix;(tflt}J1E#1{-kQeV%zlRg+~7z-932E;uPNDve- z3L@?kxa)~%z)DCl`m>)^;XQU1$jYX8@e6h?%uYgY1@kW2`JWgoD7Prz&L9BCWB^Vd zdMo|UVNRa%h%bg0x{q+7ou9G<#Ma;G!!NXx6H)S4eFQJ9mDnD7`PuG=VG(|)g8OI4 z9kCHYMd2ee_}`d8g~GJ^3c{n}w-B<-A4{IYVxk|%qOUAIC@U6z_-2Pai4Urrh(646 z1mlL-LXYF<5#d(2Lr50Fm`1}2M9~q5qA>`^3e2K~oi}9Ua|qRX6~R`c;h25om?olq zvR}0)fk81NX#UlAF|LCtM*UqtkLEY!=eWOGcWW{?%ctez7U?2wYUZV zFdiNQyfEDV7M=)?h1j3R0FI!6@o-Y|{~Gzn!%4m&W#V0dIlOynZ2c)#-`M(BfC7_y z=00q6cb2&V! zR^+oe%x0>83hz&$%@?K3&(Y?IIAgK{zdq)7rPK7V+RRQ`=VTCbz`-PvF1YKzF-(haK@FfA(C!74ZBUXAK{E|=1 z(nu>XbAiVs9L0vNAg{y7k~FavnB z30_NB5XZn-_!AaRof=Y;aX^UX5b52(VO3kMX$PaOgB4k1PCCX1;cze^V{rtNgMTOFt8!p$5+_ainY7 zPcvKPt)UMC2%qASMg=+gIN$E4$UTHu+#2%!Q8HY7AMmS&n|xD{j!#W99v?O1HdnFK z*YT_W!QgWRwR%YaB!dJQpmbzS zk$^6ge3W`c95^YzA{}yILqQEJ5X>(0!0hBJ*7hNrd2IWQD+)RvfM`XPVmCg013{%? zc&3#Rcp7MaaF$~GGtG6ug?q|wJ=`R;7>|{dxdImDA@g#&86@VESQv#bn?ZX41C+!* zfln_?*Hrh(27%^yc#F){NRy>AUZ?r<$r@2{}c&~ncEvm z7qOVf(G5acm1ZGLNIY@jd(N(kZT&r#dzr~=Ouok}@&C-NNV68sX08g|W6 z_N6~yp+96o3#Sd@pjd)kR`nW{CHkNi9$(OD5s*f+tI-;+r6{uGk z@fsWVg#O%r#_&iyAG&puz#$JuJzDzsO|TrVLf4!^9Az!#0cK?G+sUA1C=pI!a~ zc8M038>$tyC`(%(Q#w~8WlV?dazmVQo3!-{G^LHhx3ob5hDqLWwJ7D+WB8($}hXKzJp4Yc$>hPw^NGuE8UO<2@WssY)D}6=!xGmsH`L zC_01l@k}W4yW1N}FpgpHF)Je<AJ>b>Bn@_bn#;oYK9_q{n2P$p(`ENmK@q_1i5KX{@XiKjurZNRXkp|ANWCWb(&M z2FoRPbga3bA^AVJ1WFV@W`$#gZx`+sE|sh0t<8 literal 0 HcmV?d00001 diff --git a/code/lib/Bio/AlignIO/__pycache__/MauveIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/MauveIO.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a01d821114fa52f7493e71b915c1dcd8164d41b GIT binary patch literal 8463 zcmcIp%X1q?dY=a#g8}#yMM<{fou#ZYAq$j9y;&01G$qnZSrj92EgPaTJ46r2A%Owd zGZ0B=a50_Q%X&+PR1T>e$m3RR<(NMpe?ks9%waFt9PC3bx#^PpzHYpzhvQ0lhn$}7 zufP8K>#x68H$EE}FcmyElw9fQHAVRkYIL3iDxcyFeusi7MTMzMtEfd4U%jGL^`fp4 zEKx~Rlf|T{Co8FHx|l{i#nKg{nki;P-9X(enyTWeM_HCRR*N}NGf^90+2SC}v4L-u z;?R=92HDWJ3LA3GL%leR_Anbkd&JG4J;JodN`BP&byiV6^J-J`6>rmDerN@adcDR& zt6byO#D2BxOjxx>SZ@HrYgOxk(|GP)Ugfn-?goMP+!cT+)BL*DupI7M?ta}Zg)Xzg zn$-wgE8Ow|t6F1?ifcXh9JF1lf4*|VFNI#s53EVdh+dO=AG^EYrPt?`tpiN3=lCHJ z-mCE)i+LO~RSv8@FWeGzH=TMNGY9T&!}UvUkOyCjpz7{B)q2IfWR)SBVj2VQ|1I-c+On}T$?R;kqX(3+h!O$*O?5Ux7mmo5*$y_Cze>;dy){^>;! z$~aSH=Qa3!Kja6QH?Rf4%%T&7^Ct()2tpfg5So#MW?sjBC!wo?Q0WaqS0$mVCkb7Z zOYG<9GpDq(QS)6vC*0z$J4HG&wXspERRS@5&YGURQn+G$Vs%$u_?aIu&(BP=@RuVx zXU)uBzczidaQ*g;TaZh4BiGi}=I7@Z=GPV$7S0Fy2A8xBbcscQVL4K) z6^mWN@V2I0oxOeS=8eMil`A)ISOu%An6!E#CHq@QXmP?KZHpA zf)TJu90Cd;xRKKlo5U?ij>;vFhwM;bO;krgBxrynM&r?>8ENIT9$?WpjFCkD-&#o9 zMx4t<5q*;Dnh?z0ws@d6w{P4o+_*A*y;{i5Mj0cGcpwJxJJR*37jl2uYPj)t6rO9XjwlAooz*a*=qP!pmT4S^?xH23tA z!n3>p_)gWGo4S~9KKnTcJ^kDB&o1Vz^MW*Y;o~~w@61^*0(aLYKx_7e;IUPB0p?oh z5ZZ=H8A-Fi0Erik&B}qjg`~4pBH_@J!kfgqfH$~>B2tcUcsf?s^_JSwT6(C4TBHG{ z9jk}hy4Fgxl2L-`x0F^QQlmtajP!B>ho40A-Xk|`aNnAw32FOa1!70@q)H<4G$^nM zvvy$X3~t{mdpJQ%u0C4+a(?Z8e(IGv`${cv;wi7k$)H~G!dS05^;n0K&!=LuH!RjX z7VDK-DNe}+1D__1bx7e43O@xRuO2MSuKzl~!D4;et4`lWa{V&uZ9%s`H))DnC3*A_Lcv-}=<%d)CgLuU5&2J0tHb`^^3Pl@M$5cb* zr%^8vp!4YMN-%?ptKg`r6g8$6_1`LG9A26Jtx-&{1WV$mnPeH3X2!Q_G4)#|p%l}Q ztr=$p3vCtsK*m?$6`k%uYftF!RCjA>QhJB*1|Ombl|yA)#inV$SLsw7DK?lusf*LA z-aNhR!zY7clUyrKZ51a7rWRN72?(}PcX@2sHcoM&ZN~%l9-&^v+VI8 z#Kt88w^EK14&Myo)XpA#$vSOY(6nu_H+&3S@v|tZ#3SjVq7AAkwK>+e+^No-C5;g9 z5F)(`c!NKp2$e`Zgt|uRk$MajwxP02-O>0&h_xN6-(Z#75^@a?y$wkO`z!%$yxYm6 z6iXgMF8de5G)rN8>Wp%z-BX^OZRzd)bfiPswQ+^;cGsUM`xBu-GXrb9Z5qmU1}2eU z#&;TUzIg&}1Z~_cK;|YkGu!-c=%Y-?==4q7W=%SXURy4QslZTRB)xqF4veK%cYP58 zaSFHE&k8mGbeyo8E6Vf+gghioeE|_y2{OXCtUEfaZMrn2H3_dsUr_R`E$2B|s$*@y zu68JZ_R*2&_C6XL2PZH|7hk(F=;L2NoD2lHzo3;Q-v_ziF-1{DNDp zhX-94)+E^9ZQzQ6tu@Erepw(~Msw`E^z0?~LLiUf7XlNSXUgTnc=wyyU2_E{rNc88 z#~=f{D{C8|&2TFoYgg=A*@lmm_aCwOrBi9R_c^a|eh#aNQxF)dxN%bKVVp#m-E`xO z>o=+tX*j-yro=@@jX;`K^Uz{w}8h#k79=sddk5dv+8V7kVVIlF5px1diChX~RSQ&Iy3E?TW zVLLlIKMR|ImcxqF?L8F!hST^4|=~=eC;cR z>A+9#r1%f8xK1reHKw0~_2Qd2r;u&6;08NzqH&@R!3KO`-YjnN{!YWl1g!jDBb}u> zaDu{O2`|})mjvDqZ9D;&cpbi8s{G3iXQG~_d3RD&4-MF~;GcNy=SlFECfJS!%sb5R zHI@N1!A!xq$+F+2S}B%mrG5AcqQeI82e0u4&R{FehN2YB8T^h7%NqZ4lt%l{*pci@ zEx5-9;jgkkPAN(q<=6-tJ=X9&Mc-Bm;|GogqZAuEBfOXYn;*p*ky!h);Sd|>XaYNZ zsKe8J)G{PBHjY(jkr5f}^cjWZDLvy$ZTEvG*dO@1GtJ9Sgnx57ehudNNaSa9^9Z*i z)B0ZgZ(isaFfKaDSvZx7wISnCfV_gzZfAyR>zIX4P%S|T5ZWQVeu)6TpyD!$VmfG)%ijK#PG!27_LyDqe7ATCRCeIX?Y_zR zMH(p_OV_*bj{*C8yumgK%3;WpXz(C|>S$sNHA5ZLG&P5E98kP^4y~M)Kwl2M4?3ofrpDA^^{m=F*YBT@KJ+|Be@-dDYty?5;hMLfF zDdU%%pe4NBhcb^(@d~fopTKP@NZcraqr{EU;AA`?*0tO|Y&1U$iO_xr%1(!=H4N@B|7!S2?)cp@5jbB4Xt{*E$3Ch&bYUAvleWG}9et5xD zE6Ma0oXI}0gVj|2M%m8L>d$!({H7tbnuU1`b5j1wCE5# zMCl{ssK7EXQ)Yjg8E^|5E%RWJdLsOI(#l3A%fRLSDKw)j!az34GIJ-v|AcQAX9w!f zq5r>n{SijqQJyHf|BPUg-7&$>pbRHp1NIeOBvTojIE*_AoQ|@c+ktDu>QOryhzv@w zA{|c7U<1cUz9OTQ@-Ib6;H-v2;5xrKa0sgW)0gV*ze7@>%QARP!V6d%lIuYDzp#<*Gi3x(!u-A38fK%ijv%D*mu>t} z1k@2f*KVDnRy1;SHX1%|^Nd{`iH2_y*5nBa|MqSQ;kocVS^-85va#>|Qs?v<^qn^NTC5)Vr_LIdWDN^3}+0<%tJ(mA;Fsoj~|*q;}m{ zB%L|MA$|izY;=z85{2?$8NDcQ#Y*%1V`s0M>>|`*8rNcOnGRZXB}Nm;aCWN~8bu1t zymHy)I7GLDVsOb4w_0@<$tUYOUpEIYpSU34_dDF>HOVeZfEVdJJ{#=Sr%cLu9xG~bIRfaJ_``fQ$ar{b2wau(*+18U!OlE z!|i$0X&mAyPPDs0F^dxE{3hlMieq)0k6mViuU>Lq%v2!#AOzR3S}kV8?K(rNm~B^? z6FRZJ;~vCG!Ao(tdm!i>K#&E{g{4$3ex0~T$xvU+p##0xV9=ELz{!9vf+jTEwtv95 z;Ga+^i0B65b3z{zaT_&i;SaG|Gj*IBaDqt4SCci3&J^fvzrYyQ>6Af{Kc}XEMg7BQ zouxRBbNe_b469R!{RUc^no5C^_7Px%$Q>{qJ))+K(g{VgbjmfF<0s-JPWxn*w!H$I zx&zkvT`K0N_!Sj@L&a~X=q+2+#AU)fI{p){xJ;mUp3peK@}pvOtZ}!P>Hhd3_PUs+ zHcU(CKyjeml3A)yjv~|%@w$w(EZ}tUz|$z=bg5RY<9>~=5aFNl8S^Btq^WbPe7pJ)eU3##s30-m8R+d literal 0 HcmV?d00001 diff --git a/code/lib/Bio/AlignIO/__pycache__/MsfIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/MsfIO.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41d6c6a5c6d36de60200e126a0c0978014d61076 GIT binary patch literal 5805 zcmai2%WvGsnP(N*6h%utvLs8gtntv3Xlza7mgk*#wca>>tr1x^Ms_@DXwas+*b=AN zWUI&?Nd$Wiv3E9mSU>@C2!b3WAPACwVX@eMV6S`PDL^hUFo!uC*q!}-MRrS>OfnM4 zuKM`BA5~w~kH*Js4S(-zV@>garu~T~gFgdEYXiK*1b4~k^AJwi0{#JPRz7zMl z-AE=*E0WHg8+V+08@FlPag&;DKiu}5mLK4yAkMBEJ6#!xUegoK(|sqwcOrTKoJU>9 z4TXBvqNW>^uq29tc-f{;ftz^JQ2>Z-Zad)i@bZoOw{PFNrReTG+PJZHYp9Z+5QZB% zQ8)3U&<*UUmvnnd*JJ%6>2LY?s1zaQ&TiBTgtIIC#B;XXE?6Xv7mC=4T8`&JZtU&! zys+sxyW2jdO~SXIdQQiWV?W$-gqye|I`ISH#m=QCPd<7bbw~nB!*@KOcI{NQxJ%AUHk5x(ipkNhF)Z zYVd~~E{bCT15c|n6bKirt_xl_gf5KdTD>AT626k3x)-+wOwx0M6n828Nec&Aq6E~| znQY*3`62`$X$AyqREaOU`HPkRNG%9*z;L57C*{U>T2ViT*RxDXFlf z_w|e)8b{h;G2-4gGULz^r;b>^k{MY=jLm9iH8I}S=6UcK+MBOCNO?3&J9NkgHgtTZxOC|brx&pzwomf#iKu#jU86R{1Mya{qb^k zPR*+2Y(dRl=}%jR!yN`ot~@3m~auunMIxPsnJ#>9mI zG_wwYf5e!UjUJA}<5lAQj46n%Ag`Q&oC2hpO&mkQG01p*5VHfe6HrDR@c8$De@Eqn znY@xAJCxVHD8~P$B@_RuwV3#>SbT0}qccAo%&5o|v$>ixKAU4IQV^D#1yhV-#ZU$boqP;c|O2C(08_#t-8wp?;UHTvj0q0a5Ln8WM>Z4 zTh$x#pJw|L@bwvjK+~BMnXSt|4>NnF%HB`tRkXr_NX za+EWrXW)`-@{2?LblF8%@{4n_p*4)%SzD?PRr`n%`y>swUk=0+8v#RpNm*V~YY&Iu8$jkyhr%h{! zC48}p8rb;~md+lS@(KDf7R>a|;`jT(Y-g@Nk2T=z;aTy6Z2pM)xvQv64H{UWJ@rgi znw?ALvvbJUg>2#Q+z~}j2d_ie^AD(71smjKwt%f}HC=i@eTLfPLvIh=({Ph($*7~? z+i1iBueP4fUbwJ+y>ai++QYjK)^0Vfuid?~e(%=$L;7(2n?BUdqN9_vOQzplL>m-4 zVFW2oJB)V2VfVO=zQ>bSoy9oi%T8)iOL^5v-~W($FEj{f#i)rOvXzB=M<>7wU zTe-3AHQP?X!U_D)%bA@T#Nh>P`e}u*uDOE z{8|Gu(^2QK=WlJpz!&I+v{ZMZD4n|5>jr)k{t(XHn-tJR`I0$lb)$&7lw&}DHL{XdiaPm!=EoBT>Ruo}v*s4)dkf+|k6VKz7 zX&+CbRl|nX=8_BTCum%D(4e6}xQfpDAIPn|b|q&G^qzLd-D?D1xRq?n)1ZI(;~Q5u zAE8^1H$U+^Z~frPl@B(rhc5bY;l8u!g`dWoN$ai6HUH^mjMniotZlnnUc8CMc|~=t zcki!I%Xz=n-G4c{y5*r1yYkw9k%FYHp`{m3w4}90gKD+WsI3NcY>KbdhBWEgz-6WR zvFqc6HLU7FZD-M6cAlb$P?8h||F{Kr6oHB-s03q7zjzyprEh;LEh2-{(+R>{OD_zmhN@83FUCXee-lL>qbv-$ z9Eaxk>-Qc}3Kr)=WkeDCfr-O>qH_F;!lG(d;d=uE6~6O*9JLO$3_a^1&O(A8as1-g zb;OIHE*bJHkYBRTUb4$MtD)44Pa;3eN6N!*8S9+IUyHPy1>I$AE@g;sKa}SHL5a?w zw7UD)C_=Tr$;Yk?5rXv2eFOv<-=Pc~S}}}i?5LA@B5}r5%M{%~sO9&m=Y@1!#kpJv zPYi9QI~NO2Oiv9jBXY35 zi5v91TT(_+(h)hY_@VIjavghSZmY_OX+zH~EZW6rYCTHsc5v{gqqZj$ZEn&3cvJfZp2>Dl9v=VnOq-N-An0;IIrNYCW&)1YPDi7soQQ4mc(@n zCE7^Pf52i9=Oe}P8`sSyH7x)X!H;u;E^6d=NQPM;L0AP7?wxpSd;IUS7v6 zzDm6tH@VGh!(=8mn4#O%X~yw?Uazt-K&*>ZX7dR)h8f`8!h4!cFwiKx2|iYitka-Z z{}!8L7I4n9c~({b7N`lYeE1}rWYc(?%;HtuVl#@@G)8~`&w_M=Rrw5NgjZFVWBk0% z!Iz|0OX*dPZ_ufLa-vXrniz3rgX5TPg4=?@2@BXWe2&imPI4x}4>Dn=jjuD@WCowq ztGdA!@Nd8Z(ue%OnF$|F7|^$roKAn!rn6)!C!^9EjofZDirY4f#~O{D9@3}yGSX;> zsM%=9C5p{Dy^eP}HKKUQ5xl6jVp@AGC}(g5TVJx}hXB<_>4NS$wE#t)kI;O>+fj>1 teY~6%w`pp1soQtyy0S$VkCM15e%~ZKw^j?~uaPP--Ax;m1{6yD{{f}uKwtm> literal 0 HcmV?d00001 diff --git a/code/lib/Bio/AlignIO/__pycache__/NexusIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/NexusIO.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7de4464dbaf16c7568d3869f5f2f8b467c032c05 GIT binary patch literal 5113 zcmbVQTW{RP73OVk)Wxprx^B`o1IMnkt*pomkRo>MMv0xmK(Y=b*-luc1jU(^C~?X4 zkkndyeO*vJ=;Ob+4|< zwYbr1dQFt)d4o3}n_lZL25)iqvB6#8oYy~nbm-%n-Ex~XLOj$oG(aad%RST~9VRrZ2>*>yim3l>Ph z#9?2Ax!^2MS&<2}h%(koc>&}7C{R62_ueg%FpttCV~e_fmwsIrk1+D&x2VgGFu*~O zcc8e@$L5P8}DA`s61hi&70WUF` zD9_x=7lv*yt2Gg(lCyMc7rtl9?swQ@qivSw-z(xg>c;{$YT>C9)Sszd9?A$#p{k~4 zfI_S4uM=9-4=x)w0qAAi**AFP#uH;`JT*TvKQ(vF>^wKujod=nJ~7p|b7JmU*-~En z*c_U2W2kHK>z*w)@;bN1)-~t0;)_3a$i7aa29=rA_OYw%fqwn?vhY#?Ts?-1rpR{&|hI|B=Nl0KhJdwo_UWFP$Xl ziE@UEER<27SfiW?60C8c7^Ks-WDR|nb-iwAX)UjK%{1n|YS~Q0fhxi9EE$@~KaKC- zd;9w4{S2YKxf}Jay}q*Yr_I$Q2-AcIuWpKDKikZ^*EZjcwl*`Y^VNP3?giT-+r);y zqN4Ney;l^r?%rGOAAR3?V_PKHO?m6jG+7Evr!mm`cXzZ;%WuRfFqPd}u8iBRq_~wo zBfsoeB;L;pFc2CD?&rZ_9Bhd=^IBE0oL|l0HO>5vl&LJ;2SHp2U2gIKi=uXB5cMTF zfz_ykwRkMNR<*lj0~`uM%H%J|L@$dxPmWw5N9OdzMsB}|s_Zfz#*Ay$EyuZNoiiBv$+_gric#zB@n zE3GXnpBCFl1FFx8DXJgFM1mYZY)}z6fq)|AZCa=;JaS`b5-lFh=7cT z5*$(xa;#4Dh=)9gMwBDicC;^&$sS-9Y8o_^Rk>YI_R6kTRp7ab)H1xRvN%HSR5^0Q zTv8D{G1{~EYLYN%gweG@Sv%WF3vATe60YPT!IZLf-iY%Wb z{Yisba^tKd14t+pC@AdPe5n*U)w!{L?wRQkmYB^cKnKC<6MZ&g4o{gHJ|GpG` zlE5T5(jxB{(8Xh{-WJ^e3QsG`SOoimZHb*=KS~REo!uq9scWHh!!#~>5X86GwUw2X zs*9^vZ?Sv(LQ3Rct$CG9Rn2bxbwd{7m3NR+#79*N4fU7aBb|z|5Dz(nIYl0@KAsnH zQ<_}WOE9j}j43g55C9*%^K$3T2lv;|}m-;jjA(!4+TE#|1`BxCor-hh6XM5}BmyNzkP4Q&T?F|q4J{{gz8 zgx$$Xv}Yv6i3&#VJT_<&iaxUHcD_^o+E=_Cb&q_g7$snQoMU{%bdjknT-sO&Ifs6{ zS>V7QFAM5(>7CVasmy&^4EY;awwv;IH1GmF{(wi>7;U7ayy?yNz@EJz_I1WpNw%zy zUMTxcB41v?E1mvyvTe>H(Dlc)EF@l}S|ncjoHZ}n^vmGIABTU1{VKN*70|6HU=szL z{H({wFC*cNY``77D)@|i%9(M>*&JxIP=^C zemylHwHjRR2$H499j7AziDgSLF1k@7IN(y+IE*TKYugjXDeL>B{Rp9SEnc6FI^whN&o4fJnQ?9iXf&F7Ty}Z&(qNNMrJnX09sdXSsG4|boCMM9k-S<_!44dlSBG!x|0 zynG!^y;gPjR!1H0LiLU_nM$tR0Dsd+!nrn96IWdC&5gwQnmP%o6Qnx3C`aUL)I|L! ncJGh*Yz{{wnfi>^Zd3l?c&U+WS^kiA-P0^a=e z@$(09f7m{P`y*U`$Ubf#wU2$KHlMJyHKl&S`e(e(rH;GlEnPQ*ey`_?(A@BaIp5pb z?YO;py6IXd6MonHuJ4#volf9*LFl;N3xT=v-bdG#R?N`%J3+%RKJxpfB^=Wk^qh9+ z*r+i3fn$bSZeVtOyWerlJFbO$$9&uM8|la!eXkw5z89Da7#p<))vh@^kZN4#ZkW3m z;I8F`)bU+kY@4<#Fj8mNyz7Qr-0!B<>tW=;+37o8+X?EBYi;t)gwwGGXtD1NA)(a? ze6uI~JFe~64Rdv>A%@!6a71A6R0N^r*_KFqNvFy6ScVP1)A8@R-X?_I4bB^edCFXd z#9MCY1U;+mm^>QVK#t^@RA6)IawBOU3e1Z|6GQELj-bUL$(CpVEPF9~OFRVeG4nmk zhU_#wG{+s^+jN4^FikWtJC3&*ZY`R^LLE)B<(V&?G22^~u&`PpXqcBwDPG|8PzYhU zn^ZLF2UFvP?uLsFcp_++v>r3PZ_Xfgva2A}tzC2V(%}7TCZ>*d&imK!z_K4Y#&_^D zj3v*!v3li7Yvp^*=Cv#4!diQ)>)N5|gjkt|`SO`FXY1o<&4qXSu4iLzsJnNqpxt*m zvXNn#wjZ9xgt@+LF3dOP>om(&CMZp(>*1Aq!l7+yVRt%~M+?U9V|5)Pn-;1~*VPq4 znDx>y%^P_8LD;kwcU@cK@{8mZ#C(>1so5Jb_^9E^X zZ#Ue+{J+#V_qqwa?O?-TTM5Ss16qI}S%GZ&()W=wddKRx*uuBavq2^Y&)KvBw48T^ z8zSE@uXk+pc?X-5&4caQcp;b*v?Yt9ZE z8(JZh>$u0CGXAVOZC}_AR6K!_gITL3yo?q40)@xX48|nHXj%15Na%o_$gj7Qf@0FJyaw09{km|CQgK!t=&^UMaiCi zUm4{F^O1U4`S`>rALY<4e>-qtbmo1fVHc(RQI7hd8e1J1gMS+QB%Fx~QSqJz`TuMkBah0FL2voFD(2|P zsAQKS?fcqsWv>!x_f>k)kXjw+){RE{c9 z#jYGxsGrd+KRp}GN=al*l-tf>%*KlHxw_Lxq^|Ni{0Hs~`@n0;$Vht5ZRgSUixv59 zvemC{Ds~AgUbCLp6yn!~mOh?~Aw>y2fe}1a% zcYBj4AkoOwO;3n*6{e=^!&SPT)K_w^_)l+3GnTmWMH5!T!psMbb3PO08(Z;j)Kk}T zy$1P$ldjk6ha_!~btuS!4WE9$4-d6v-NBv+;qfip`QF$?51*5^48X|jQd2k#^A@cW z+Q984AYll&;+=B3y>K_PT5{3SNgrNVb$YP$n0}9*drlB`b2nWNJ?`SRjv4fWz`cb# z8>>Z2m*JUMZ48U%bFTSZU@U*Bnh)|2{=v&gM2TKQM@6g}f{N?axB#ePcbr%Ugo%w0 zX!WlM;fr`~(+OMI#%)Q26YDUv@oWHx9=b!PMe@a(YsUqGhMtZ3P{gIy^7Yo`>o?Zo zJgY*i58Ymz@7%_$#Cc%>Ibq~n;B+?PayCRt@QvV;S^Biwgs@a4!?Z*l0@{UrGr?y-stVd z2ly?=ll3YF309COvjwf9mhjI|t19PmIW?ym2Pv=NUsW?uSJFyqwRi+InmVT%`U$<8DyrtExfGOEZDl8 z`%GyT?Yv#U-;!OlOZZ#nc&Qv$lPOKWf%PxYx>W!dD2MrwJ7#kNgxMDiEiLm=Xap!s z0*%B*Nj&n9vFx~AG-O)`#qRo0-D&JD4et`EJ2b8h4-iObG-2X<3?JCc1<#n~8gEE~ zHL?gHmG=Tr1Oh8IEARralMlH8Ag>OfqYqRD8zS+Q*>OFGLO`E*Ge zb-7JjhJIv=$T%^W`UDw3CIyg*0)Oo@x>N5fmjP@>TBrf~fDQm~dXb>p?vtS&0sb7P zTAlL+&gbmnnvzq(+_oW_szU#${f8hH;QmX?Z8GZgBO!ELYq)|rB%8o;A6G`eZ6X(` zKhk#a*jOLxTC6qB#Oh#NBuGWRG%j@9AS7@3)?ZLp)Ner=MCNIEN zAuxUmne@pA^wd>$IBcPB0Gj5>o5Ozc-Vil3h;{5daS*wBUVMuRaJ;QXUj)6fi7OYSCEOYaD;~$ z4`S&?TEzFZnwIPV8{7JQZG^>(^bKN6a>M_;G6*SNp3(~nuQ}w)tN^{Lxv;Xj^ueVY zS0-W6>V!QA`v4ixRYZ+0nTYe|Lh2-16dPqX!Ur8b^HZ2bP17Mw+X^iZ5y@{^%wWo5rLK#okrCuQ;-YYRgdOWSUSIJB z6&BC~gcnaxGxB6Iq1_jkTf99`{Zrfq6q+cdL+S}Njb)ZnTbMP>G@wFlaq;?T*nu3& z(27wcQxYejfEul$SD~_zMyuOWBQ0ozI^r8`FGsZ;Q7O2~C(5dag$R+2orAKjhp_t; zbwFWDk!~kS%K0KIZ2qR=Jq3j=LScU_6;?iDtDkBgDW3KrU!2+>h+h9i|Th)w8j z9kEFA4(PXsy^ncZQ3e~CY)#0<^SO5re5C;mF)}&!#&Sd@L|TxABA==8hKR(9M-fME zBSI2G%twVJN3=9dWXNJC$$y52(#Q@5bS8Wvnz^S!8xIWYE0*B!$T2-) zH)Om6j_ni!(oEIJ(G+n7bS)x*t9ce1k35?af}q%gf4dI~0R|KQZ1qoaAHZ|%K)J9e zIuC?Dhw-F%7D+r~J3$-nfrzhV)Q}w(G)kPi?YkaIc!n3KL=E;$@|Wcc#FY(t;w>=f za;kIc@Wo$0)#+pPISJN_iBF45#5J)qnq3S1p0qlUjm?^Pjie(F#?DK;PPq#tTJ=%@ zKu#|qqmRSM8ni&jje)fwA_sv0J4A7jo<2g!F-qQ|nYAt?oCQe zBygd2AWqUP&ke;1972$RB|}Iim}2YwbH)&xcizv@D)5jfT7h7EuAmk)g7?yk62Pwj z#8=gtR@G6jG2qwHB1geM6~G_RA0Qw87I43$7WmS%idIlHK>r%o6ZoG)Yt-rm1PsHY zk6E6&A!tX5Q!ZsPTex)#%_dj4EA3_#pk#Jb__E)J*VZgQl(T8 zRMWdha8ChE1}Ar41A-*#s62dz1vf2Nipc|yv7u;W z3Qxkbz$7&7Qz(3mu;p!qKy9Mj1WDE9x+WEy5CHP5xbGl8u}Hi=mr*vC>8mn)^%0Z0 zKRBE)ld0}LBqN#OfD{TcR!PPmD%*uA30aavv{-G|RW?FWzD%^k6QV8L93RYpSR+m( zJ((FFcbo?$F*d|gVv#vxTS=M^vF1i<2#0r@-S<6pN;ufM8d`nzIedC7=yJUj>z20* zj0LdmZN|pN?iD)&M}f539tnGKM7hEuNx;&>Jz|tya5Z1wpHoP7luvh1LgY zz$!9|KBpd2YlR9d#u0UR@+&N0z3@owrf%ceWXQJD>9kto?E(QlKt0DN;yQ9w5`gBE zLwG7_rA(YBEnHU}E63DZO{U%5s{ehJs>dmpav#~gx-9^)}+FN7JhQMfulRKO>yEucVBWuvl zyXYI{KcRs_uy2U{xGx%r0#tnX(WWqB;)w0hUp2s?@X-@Tj@*UxD|9%wh_3EZPz8|@ zUh;<|IApSRaT-Z66Q2v=s0mVmR>pZFfR?w5Ln2I1GELKOE}hUgZfBs6m463U@NZZ} zwx<8z7@&X5SeBoCH3n!Pz+f2&#`Dak3=-Ce*)HaDUId?`*=6Q^UI)igvdeq5C}D%Z z;DEciuWcfr9~oIsbbeg|FEf*tq)Zh(58GWySf@V*+jD5lIo#Q4bL zN>D-|mnGW-jZ6rlET$oll<_~$;|4L!Q)B*z&NdR}N6DBUqT@6~8X4&$F5y9n770T$ z*)vJ=FnclK2eP>AZ_~NXDZ^L(Cv%+!jP=9PDPEyruTt_ZC5%@e$LxsrsL2u~{=*u71i+thNak$fV3pjt>Gwl?N|Bwtc5$jH{r4@)^Wdej&j15+{`Ws# zsL!)=OAbC{%gH9^-<)S(#-F3kNAnH-M3Bz3=>xl|4_)T@Gne_v5%7g8f7OQ1D{blh zhItxvkwdSccK*#T)rfRz9DV7MVa;=AagJo!zp)5h{vE9bu>(qh&iRS7BQmdoFQZUK zw9cF);#P6nEN36*GELbmCZFT78aHQ?J9$#g`rX9GoABxzI1CB|ExI84Az3}v8H$9M znBxdvP+TSm-|@C!878D=#UP6imoCW91KuJ`8xVm3;u0Gi|0pGzQm=hn`-9ry+JV~X H+K2xQ<|prE literal 0 HcmV?d00001 diff --git a/code/lib/Bio/AlignIO/__pycache__/StockholmIO.cpython-37.pyc b/code/lib/Bio/AlignIO/__pycache__/StockholmIO.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e638dbf6457ace9386eef314e7c28778230bdf62 GIT binary patch literal 16479 zcmeHO-ESP%b)T92sW>bHC2{opbM89vadWT(2vI+Q^!s{5vJ0D~-t8_<4VTz)~uT zrCM4;ZE6)wrFg25YNjh`j;9-$X10>$c&3qS<|}!`vsSJ#(9|nB$Mc8}Rt8mNUaN&)FXtAKRD){!2vhpnLvP5d6}7;cO-M=PVMVvjyjKxJfEDUO;WCl%$g z(<+r4&Z@hxWO$u+yCr<1-V(;7=eKGbYpq6elG07nH>NtCZ5fVh_-nRt>stAyVVSp6AUHKoVu}%3LcuSPlO+~#D-xx0yQj50ScXLDvmjFB{8u zmge5MzI5ZJajtY$=DjsPXDlyQi9VIVRXTWG3@_v-F)nmTUZoP#D&IPL_S{#`zF9ga z2_d|Dw_LtEH(#EgyE|7d&y{hzi}c+x{^pRFp=`wG=V#^-mgnc^??!yxHu?xIFB>xl zuZSWGfy1S zvl-$GOwX5RkkiLkLhzUHH3O!IwV1CGSi=<#$kRH1e>cay97fQxeb>`EpE|odH-k>S ziw01ym&-HAK`zFFnuUwnIX_oM(<##5$YeB=MlxejYtYBIkc#+qjYxJB~xAB${=1WSG@#>%T@Lt0%dH&kubidRq#uVDZas4T# zBQu#43k;S#=MB+%h~~SNaqjGEkROds)AjiEXIZ$IjF>rnt5pvCGb}Msz!)3lcxdA$ zkVJ>DYzda%+>{Ia5U;S5k!_qj?HQ+?N#nFJB}7 z*=H2AdfDAJR&Cc7W&>g-ngYp`h5R7_EN4=J(U$Ga+q~${kaUV(^K{E$#D#@})j_n| zwwRG(tFK(V%z%u5wEEU2mWEs%I!h9ggs9;O0v-@rR1xHU2W!jp!qxf2!qo{n*{;`t zDj|f4=7gxNZ@GCur?w?UaY@$Kza&Z<_O>?_%BZKT51uBmUO0rv`;&*PtH}@ey)#(3 z`>$xoksC&%=b_e&lKY>=55veoEOJKT>6+;pt&ZRB0Q6Fi#NADO)HE@2mQ%0WBF3%~ zsYgn*Ux7Xpj&DyDrws|Rjhms4#vDo1S!-n-h#F%VBL1fBueGec0g!4!K9$1&omn?M z-<%A){ZfK9e{Ipj1fCgoT{@uu2%bGFNR4Dd#KE%xtc#a#lo#K*v9NgclCIxud44pA z;qbtGanJb13f9yLP!iN5>@0#m$cUGz(QbL3L(ehRrXB+pHd21skR--iI4n;wTCN$( z?;7X1mC3k$E_35^tBaT4nqTe%Jw|!1FU6c4EOF)PAtk;DGv^D?LdLH%+ddDHN8csL zi~253_4QqxXHeh8si*|{F3y7$`aos@%?{DzH-Z9yAT8WR_kKR zCzl1h9S_%R*C44$=E(ll2LQ=36CMhy1w0DPYOKIQ*?<{?!Vyk{7;a-h6XXVd=R8Np#-GCB8-L!kBC1l zYw$_`H(OBC4QIoKK?R`#`OzxLXB!^Dyo+-6W;X$Y1fez&&0Fn(b&rX~^RV9rh&xp~8A=Qj$ z%ounFEK!G}ui?`{U+av`xqqcoCM*q(X|zanA2j(ud0{Dex(}1{vyjtY#p*lEEc$== zPRkI|@oD~{FFSg6vK={1}h^nJ1{ z$P~62ic4c}Ccy$8uWEX($KbTj=9Olxft7>>Og>mPp1Fd4@aA<^s&XpB9U>#|pF4Zu z{83DQAcN{!qImYBRKA*gFGF zMGQJ-V^kQed=OQBvAB~aHH}6eup$qB-wKlTC%Eg2 z#comkhoP(J*{3yqYNIQU$lLgNUq@go6&0Iinx$4!mS&}_w3VR^F)MEk ze4MMKEuA*KKGrH3Yseb5My%0~Qlik?CW4W8Ldt5p39y^x z4Xt=SGV-%UDbSXe@hb<~otuGn>sFxMSO~N-@)j10x!`CljuRhTaLnt7x>>WUW(|8# zu;HAB<8KE;zA#;nGy(2*a18bxTJE$&ReHf?sbTaRKwovmaV?0jU=ZF(=Yd`I&`m)W zNK&-iK)Zb{&~Dx=YQea$+5V^6V$(6uvEODls!h||2(%?IyEF&Jmx`$%-x472c+KE= z6sy)6aB`BEWXEEVZnRn(H4S}FS2q03;qLtufv@z`N9vB&Q+L%prKeQ2RSHu*3=wu4 zr~I^~JyO4`_LOy*a{)Q2FekH{wbFa4iZ|9Np6O{53Z=Iudg>MBgW;W2Pub1&Qk1uO z{~qdE&x?QATK5MiwyWp0O6r}pm2-Po<}lsPzE}t z)rR>%xIVGwkNd;|NEJo;UzqJZ?3{b>WKWbhxAF%E- zRvOr^l?o25@le8zLv}+S4HyAtUrAb{eU5F~FQuC3610%JFW&6Hdx1KDDSUXw;ps8R zZ65L8MVs6iyV>^PgHPH7%`A|D*U;*Y9u(5uK?o&wJe+oPPRtp!@8Ib{hBr{VDM+Tp z;!u#n&O>D!t(Lls{ESs(t@>-hco?a+>eaa7pip%zuSz52+iswL!)$cyt3tFykcEX| zHS8e6!xs#3Av=Ua!Pt|_o3jX?$rujvIN?Yh~luGX-q7;QuuBFZRcgEaOu>cK!f z6@pBy)p7kGE5|ZWHv;v+Un%0N=-N-;ox5-!^GbO4*PZ6MH?g;RzwDaq7k%}1IMolqb+D4jt`Z8t|Ueox!c ztZZ2JhgL3*<@uc~mN>neJE71*huF|iJH*VOVrjpt;b{t7{LmWcQQcXqzkCYiQ4cuI zgO9%lr#gwDP(%sNrW%)T)X1!dge;T8|!D>(B0COd*JmbZ2>1LWG4I z&C3z&Am0R8PwYsUx^;Wu8|6D!i>2;qln4cFlO!1&9^}xVqMnH@MJY`dv1X;JZn>YT zMxa)M3|ue54^p;kb+faly_wTqH-Fj-V=rFmv|(b9!$U4@1HKdBn!00)3y@^ptQ2V7 zVMcjK0Np{rC29EK_NQ;}Pxrk~PvpN|92c)((TSH41S!Zp@nwo>juoU`vF%2#`>Kbb>&4gF$St)`ZihUBw`0t!x2Ocw(ALghJ1YWE|@B zMKJcHohM2tUd)SGB7Po0B@gbZ%v09u)a%YxB~65b6!vI>%mb3lL7L7Fu{4yy>-2UJ z!9U~YeH($IjjH)n76M9#$jYV)+PJDghz)BKxN|I}s|EZ`3>P58H1#Be+^JL^@riUn zHB#evHjETKJC(^I{YqcD`}{$Hw@)!F76*cSm77tm270wB&oLohs8%;SW`i1tRIXaJ zTD5AGyFtu=Z*h%+Hz=S6^)-@&zu=aV{QL|2yyp<))pUkh6#eB>x>nHguLvqt!!N#K zc>FD7eO`DBXgp>eWoSHZ9m9RVny`-Ju3OJpCvYFMj$p~1435Mdv=9P$^IvhI?FPmt00`9J?0|sZ1q9*bj4PA2C)6$+4Uh$v?PJ4>TrwmQ;--d;Ve}+- zEU8V*FF1NYOPvmSVLK;AJ&CUYj#G1nX0|aUk1Mb!A&^GGfQb;JNZc+m{A;(`od#Mc zYf5Aqq)CH1xkl$U@P6D3+Oka+de|4SCt89&ZV-<|KgVc(ts`i^u(8dY006>iY=!-^ zAl)O8EF_;YY?B8NZWcEdxE8PCyhUi=Q&ag6;Iy%D$GEp}tdgyF8ICJTkCD@a*luBNx%Ie}*@WzS*!$>r^uzc#x z#$6Vc_xdPAN68XU8y1qxFzIV(H72>|z@Bz%;c-yF2mnY+ z4qeiB#+3<4hNKN7jzGGKF$#`SFiycS3MMExj-Z$mI>iSmC{Qp&!7v3Q6pT`UGq#HO z5`v;8NYIND6ud}*LBUH1iYal5;?Gm?0)oXFnO}6VZoqvGKQEn;KzoO*YZdl(UYvv- zuI&NPx&_JuXvVXu+S9y}p91{WcGG)+PXNQ0{fxB39l&Kj`$*YIBbLE0i(d}Eyp`S= zc&u&hMl?Z1+)^YBg3O?!i9aN`R#k5)n-+31$oamN-5KCi4yhk-ED!5?U?T^#(Hv;C=y+c+@(wr@f)1l#AdEbYY5EV;dPkd?H3>fpv6) z;OOroXUtM5hx!K`sl*X#9NHP~4fS$d<5q8Y6%e>LbWw4K>5kHc-Y~&oZev7qoN4~} z5Sqh%H1kaJA7IMli6Vu}`7m)1aoxx1F;LaSA8$`Sg>DMH18(mr?(1pA&ur)@eO<-r zZuH92T8B5R+|KYf6<2#tLCO#2^EV>YZ2_prvFpl>+E{S(HX?4x@M#ula?!ZeIc^>NBON%O0h-W_6( z)G%g+QXL_qr#cFXO7%!D{WvXt-b-<)tb@mmfyXJ~W6g|(bW)VY3_LE=^n?sKG7J3y z|0rhNIC-Be%xuitkyT}9bnDN1X&71G+Zm*JctXK!9l_fLGJ!$uZ$RxiX;$tY>kaOy z=!YXaV>?Iv3I8~iLVp~=Og_;&;;7ciKg#TkBR#$j1cvz$@%Z!J80UQnc}JP=v>5Fj z<-AkKQ+7{)&N#1zQOwVAnxBtTn|}&p?LsfVdy);o66W@?oe6L^fqF;4Us}8l{+{n0 zi}J|SBW@q3H(y5|f2lW$b{^mQ9siW1!k)Lbq|-Zw{&^lIUt0X2H_tryuOSJn-F&X{_!ld&-|^o9Sf<_2KM> z1qq`HE?TX0Hc+?vEC2%kpXwJI&{wrqvwi8$AnQG%omIM}7=uP|y9E0K4Xjp67vbT< zHp?YMqF2S6cukOx_7ft|eRYl@0X5Bog$kD)fCQljoTZH|RMv?R2DzXIYUxvT<`bwE zrJG^b9N*>)zAek!ZKM0uYYcEsJ0WaK4(i2yk1b3-$kYnI0>NS8?$-clLld0+h%u@P zjc8D-^s z3W9yyXBc-^veaJ35^vF)xjKd$=e2{hkI7vbji*rL;Dz&o%pOmUd_3OlToA8dgG3B)*m0(qJ`sg6<_&)}p`N8zp*O}!AN3hJr! zM4Ei9#77|oFD&S4pfHXa%=+zvDbDCRN2V^2S`C3Qw{ZGYCKJ? z>YjYM=!o66Bq_*$z-Y3L2+*PmCn#KoyZl`m;}s3=)YLA%OxwekYS8NUw+*DR`34Op z_gyqUMYo8Tg$hS*nw}Oj-7B}*FgK>iWrEYd0>BR|UhK8zkGuX*?7L;5*2zIDBq1;luJC%^sWEd$US@B``b z9rA1=-W$L#*Bb!jy?jr(r)-|bP^cJ+b~F^5=ifsv)MyR^p_>1MX5BG2Z0=+jOj3@p zPLRG6d2E|^l3*+)-VA&KSX(1E2GmM2B+`px;36|D{?Kj}dzP*-`R<#y7TL zV>3M_CEbsqg-o;;q(L0=3Woyr$mHQ0SyUdSm)lq5SZ4MZQIzMVjTO{HW_wJZ{TNLg zeB)0Zlfp~O+^I`*Ngqa^5E2)39l#|qNq$D>Vh`&-gPh6o!M+!8W^!Np@OQc1C|G=_ zeDJMELz20%1eRUu5)|_#fi#ujAW!R?^ebcpnTENr9 zyNbyt1mvZ2TkLM45vmg!Nm>l+nig%4f5+ZtJzdnL=GsPXWk9My9A-G6PbwqHM5>b7 zu4J)E|Iikd;r$OPgQ3QvQU}CEFkGcg%2?G-ljI=7T^Wb_8hV6uiw?OxjQzG@#QM%1 zBrtCuiCB1$#3t>|!UG=whr$DBTg4VxYU?k1DTsqzYzw7%|NXkJ!4|>ZHY^p$Ztper z0&os(wDq)g$YcnMag-<9eZ|V|X5<^#?$N4QenNKq1DT7w9Oa=#+AjOp%J-MY79R4c zKB9T>8MfcjgyT2IXPJAOz2j!o_UZb6R4LkVA32FW)$;CFE zr1m3h2n|A*6}sOkTUNZhU=3jF9S7=aw-5R1rSD^uL&ewrxroh@h7)>Z4|-9PNmtRz zb5bE;U=#+d746E^K-F=(-PNXzZfbt{0)pEY7K;V(+f<6wE~7{BZHm2(AV{yb99QDo ztCV?-f>jD=V?|s?5b85Q^SN(}g`u55UZoJ>$-{P4Hc(=brX{IHC4)(bNjZ+cJX9KO z2I!d3@*GgS+G7x`xlk+E{(5sA>O3H{oXtg<{8LeJMi+51-3TplcwXWjryZ8!xe~em_=7n#v)%VPc<||)o)%VRGnXj3z zf1RjaGq0L&n3quciTSSirui01*G*x*ZC*xc%F3DV>?Qvy;Z1`QSDH6kKdt^0v-|0D z4fB0B=W?Uvm%JQ29uYQuV3 zd@wBx$3#ON%>ogi!(iVPhG&Vo>o^$4qPOVQF`CW8ybyQYU`MoEv(vPE(eY6qZG59e ziwK0<5DmL&=?&Lw8G&%{+!s?#ci-|v-9T+W@N8$>&!MMf&uwvQ^x|FjMceZHHb%6c zS<~Xs?Vy1pJj*b3+YyFXTX|e6u5tG?x4^I+^dwxz60V0CVagHpXlC@Xk2up*3%%-L zuUM9xyP~}>_U&LtKEy=N?2AbT!uRz{q&AwqD@?cEp`8Q4u=1TCXy2Tj-Ll>GVX)&m zdDq*X-M9Db*>JOn?puetA@+A%ut0ssXoJi&E4PC^>IAN58_k=#enb3fWplM8Hr1nF z)#%aM;&T5JJ)ak~6ZLMMJ&VQS#^x+$>(7RR%!Xa(cY;<^b~Vud59q0FHgvs6Osvn) zM_&g0nZFL(cHL+S+G{LOvc>+6<&B^)1wWVj@Yo# zalk&V)4%U|;-D$wx6K`FRbcFaBf(fKUy@nXP*WHc)c349$sZ)fM^_lQLEXt+1RD~2 z6I-)PDt69I3(4@hWN(%!w@p*Q0-a6^a!CDzV$QrCF935Z2H?JC6N3jGkGP%K7jhJ8 z2rwS=4XXyJ00hm~Gi>5jM_31Siw6{TgUAeNuzlxxAa*-G#L}_!h+aAH)SWwbM1$lH zOA~gh?RtSwjLGk~vbY^SlSG_c+c|97?OE0bO{f_&-`+W#oEDS#gBO!IZrg^02UFZb zPUlW})+AlxL*cpmM1NC!`16mjyUk9^@g;|>-mer(rR7SgP+8urELS!*m~rCE`n}@X z+EPBBFV4(}g~uE7wZ-DXVo_f$tQ6KOrBbPYrlrDWp-{oU4SbdhiyP&&;`&-;b8)>; z*w`$gb)`@ch0@Z)+RB6V%0oH!%<7}UW@)WdS}5ErJSvs$Rl;G{Dhu}tg{9I)rMy9{ z*Gd(!P}p3ltvp;Ul=XBaA1E2!&0N+`?* zrbFsq7^Kie({&QZ}2 zP1_G%mKB*i;74jR#XMDfkeBqBTxxB(zCY_&78FK4I0%R%Eq*weJH?XR)1Q9u%TuiR zLCl)-AI!z9dB0dHRu&h(czmz84*o1JRhG;5m+n3OVsrV6LZN)G^hKexSgdT`Us}i3 zfNdbejT*FwURo(uDrma6c5ic`^0>IPQCS8j-CNmQxWBPnSzlWMV=X-{mNr+`&~)Yg z685M2=|`V_{K?PeKJJntf-UrPebt2wM^u$6QYsvujpP>V$vRBEgRd!IjerawbuE#1MC9r1bW(AAf^xxplR!Fs|BTM9T+Wu6`iywI9ovzs5QC^ zTo)?cdb;JhgkAXA+Dhp$EVkdV1L|tSZCQa$fB+3Viw!YmE>)`7ftrSvB+bx3AC?DX z;-g2%k?L+~_3nd>jXpZa8NeJrV3edJS%0WN$(b^=?_n}5LnNfJ5E-;etS$&~OrPdb zfJ;nSP+>Gj5Q#aFF$LKYC0?)txKo57L!rc9CsiF8ju+UN9Mb?~t-~CKOQF8xj^BjH z2z<`d-t;bF6(1r4&f*5>4qJa)NDL$C5LYCTk^7r(7zRmD!|($GDqV-XOEhJK+N=d5 zifx6B0gu(2E@o*1Ji4OM?D$Yi;0MzP40^(4+u!@a(`K{bwk>Ds}gbVpH# zsL;m`5QY^%V#qcFhXH}sn>L86z%I-P<`Ri(yj_gYeQ0!&b_TEkuoN`5oRfpCAQ*m1 zsKB8-t*A>{Tco&|DdI)?J1$wReb?Iqq4kI)Y}_PENMe#NqO1bZF65Sw<9SSI#kR%T z>3fDG4Bl+&E+`}CLWHvpp!CUVF$FDy3?I}8#$0e61=E!5lhHou73BdTQ7e#!GulC{ zTOtu*8U7)33}l#HC$e<#lt|M>^zPpwFjzRMpBDwwv?b1K9!`@KvKiL?=aLZK_R3H>8ztt}fmnp*aGCCWB3 zR*05N?5dxv7%RkhxD`%_B`l1OmxvUtLd!Qwhat9R!{MA`w{O?#LDyfTqgGpm)LAMm?vwIgpAtExXP*&5n zQ*U-mwQ(-76vku?Jp<7P^adiFfl`tdN-FII_52Zeq+d>xjgv#rRHbLlTdC^pvu2aF zihNhF3C{(J$?Hf%O9q+@XOlUBEOBTZl=F!$W`u-9q#|d8vZZ9T{p-N*hVA*2yrN0T z`?!fh5t1MvJGEr9K%JmWBx61RYfCKzt;qZ$n-B~pT}mPgG~;?uRsr@?y4t)hvYsZm z!*1AM0A2uBp{GhGVh9;;Z zoM3Smbg3Ob&KW=@58pv(iT z7B-enZJ8XVQNvzUcRpdzw>>kb7ukS?(j7A^wM!j4MR2WUZLP|PR&Oi3}Kmd9SGvX%@O zd4sT-L?dNHdHdx2W&4@fVuB>un@&&$3TF#Owx7e`G{Sm8!C;EIGQMylP+cnD z7PRhT6_g7VB5z$EmjV;EDU`s%$>W2#6l3%|&;a081u9A7Pukm)Kr0NvNy(CDhju(K zpp>=s2bJRD(jreblx${-I7%dSGp|Qyc|c@lJJ`wVzo#H|F424aEI${|M-0Cvk<0H9 zyr!BXO4$}4;3g|QN-hsC6Ab3!(5lgp&zRuqnXjHPkMpza8b?F8Jh~C`0d7Zo!^$R9 zGg28Jjj%c;HPuVCj7D#WH@upH%=U&@+Iktuv`4R;V&0<(^++*kZzSTnT-KYw%DfA7 zxk#5+=<+HqFVT_r8tTqy?+CsN`1)5V+E0;wt(7=VCA1*fO$MpwN&jk)HdD_N zUnROp|3)xmre(RC{5JEQwwLnm;@MC)c}b(^Xr%?i&y(JISKCe;YeBY~Ff-4Szy0Po zjs8aVGTxsBBi-cN+VN0#sEfY7)w|kviQ|m(tefnncB#g{n7BV^bhSn8mvhI%=yRC* z^#1`f8SW0tk-DjGN4vva^!goU+s&BSPqgFA)4zVCInx+vY%k;YxJQ({f5#j#s%+_u z;Z6_l9%<(27uve^C9OCUj6X+o-@hD8m}6+2X{a{c3}XA^#(#oHkI2y^+tBY+#yXcV zw{(3WSftSkVf041GBf>^Nr{kzaL_MuH%TGAS!mH!i~Tip;}^mdL9lZy@ zLdPc_Op{G6Ufe_#e%$1IQ4~)GqO;II(v@0@ANk*A640WQ+V9+Jj`c? z6@AA6&omjgUjs-a5rQ)&-OwEa21~{?K@qlLA&Qee|1=MK&C6ZrrC|HK*D>K9qE)s7 z4Gw?3mjbDJBb$e9Yl+6}=@p~dQRN{fn3qHwFGUS6)Fgd3U6y3j=IPXE(|SW(?!6M# zDPp1&VC|*A2)(g&gj(RD@-Vq4R{*zt1HUH_{+$T>8jsgG7G(LP{InVqjL1X?&$=-V6gL*S< zqrPWsTmBPJBs_YWt<`XL7Sw9__Mvwbo&FQP{vBMjiLsQP$R^VGjwQyDdNPw7OJ;F* zF`=i@2_65E_?JyiBqkECqBNGelpNz{V=3;d9NCRw^f!cW9bf!xV6K~nhMzjlbcenj>Sn%6!0Mp<9F}G;>D>y3 zsa(OyD%E~^>yg$S^8OaRvITOk!nQ;`K2MN6!P}^fIRuTJG5!$(3#!JLg|tZ>s2z$7 z(tW0qjYMc_ZM|42Y%W3g&(Y*c?ewcz^dIVgGgaRU^YYUXHmVaKZPk#DLf z6dsXCU12$HXL~1dUH#9>xe@OYIF-Bf-lfuITzW%%)X>Yo2?IrXDOf&M6}>d+)7-H4 z9#wsxs*Xg1aVOQ+=w3R%wUA?a!}0;Nfh@3S%S(;FIvU*MvR zK|&#=X~?LQ%`wO)Nw1#1h^se`Is@WJFpkCXpHb)+M}V5d2^g2?gK>#&VgSZP;%a90 zM!Y9zon+|sgrFA#ylfXh@3-rKa|woXUjfb~LO9n=K2HM5{e+bOLAh@N9S{uyZD#*6 zg*ODfP+uv9%#vniRv_R2VHpX+OafZE@i~l=RK36(fEpyL5y7N=g?ToH+>v}NfpSzMu&;v#KHr8oR!nih@;XUA&<5vxeXm85_*zO zr?_TMifeJ#`E4uclU-S93dfPu%~|c|KlteUck`c~|L%ji^WV+?{QP$x{4CPn(U!$K zMCUixq1dB5n`k%b8J3QC0b(uTvCj9ON2#}Qs=)@!N$~;CPKefFq}*^oHMk>T)hU`$ zlxTrtDIz49Y?@eKs6*v@#KCMnt)LZ3s}~y;K;6s zDHJ(f>_i?(E|>7~Q0!;g!rW+|WqT8~ytnA`HeJZZdGF9g&?Q9|HhyG4`mGyxFOB0h zysEw$TC`ep9MDVHC#;!w163~rVEL0MXzva{On}HNU?xFKf}0m%%D8@Hwg^E|1Y!YS z|0`SqjnE-XNa{FsI2Hg$0@bDA&tFZ$H{a8}j}tHzz=$a`@f?wguab{7C;6pz_`0mi zZ#YtcHm|ETzmjca&2AFWhcrbUfG|@iBbpJ$0lH}zo+0D&jHYdJrp_sYgFSlzdWjrw zDV(SL{_jVN|8IJ5vV1lUB~_q^r!@U$C>A7WBfo%7oG&oBJ>+rwv#uS&Lj^sXzWyU zF@9x)-eaFy<$SnPb}s7ef6&BzG(R7i9CeTxXA(*K;teLcP==by4~a?2Q*DJQEYT7v zR?`>9!LcS)6vmL5XSo<;*nK&1Q2!)jQQ8J~CwBv9dSTMD z+ztsmjxLv}4IPl8#^8pPkKTWoeLcF%wPIutbrbU-vxdq=U^hEva zrZ*0~)|So{&f+CTn1=oJevC_QD0X=n6!fwo6!5N7C5lz`Ikah_$R=R`ffw2|g^b1! zLb`~*6l$7CWZNU+53;_@2}rwOo5xV) z^9MnNa7wD~%MH>(T6-No6R07dl+G}tgy`^v;1`6J+HK07QC{u|O%G)^xmYexO4>U9 zkOiMqc5qVZ@*6kG4>p%>+`u3z4G9$}4~1yhH)nFx`IwV)t&vDUd1a6Uc?!v?N2FWh z^dmZ&9!x)?vw!r6WEPKpevp@0<&^Y;{C)IB>DNf?;=Bx|Hddp~K=_veTk?2x%F3fH z@9;A)1=8Ay$Z{(=oB&IPTwTivV~UhK87+nD0h`F<0oG1~G{w?lIhRQooOw&DlSjOi zMq(n{(Q9$YO`VvtUi0#UU`MWiGqA8%oE9MumJ}>uP!;|P}T z_!Iey%6FWKtwTZi@M@-0VWRquiv&vlM3VZnf{5TJK42xrBL0%y&GV+kCP%4hc=On& zjum?F@A2}u?v{m|3C?%?6TUYQ_;+MB6rAUS+sF`cD1GJjM{}}*+Gg@+NdRt}4w9v8 zBv3iqw?7lxM*H@RtcW8Z&pzNzS1sS&vDVR9WAz3GyEmgx;{+DB%-*OedQ$p4Z7D{7M8;nE$=|X>G2ZL80 z^ZIHY=By5|)44v_QZ_ulWf?gSwxn!4CXM{UG0RLH2#p=hmRiy01+!g%(5u}%G3JYA zyVkVh{p5joylBYt*D09_@oQPQb+=R~-!B!*OLv%nM^mdxVN1OnpCuYfGlvIk)fD8V znu0d24l7N3bmbw0Nu|sV=%Q(y9hU2`{QRNwHL~TDVBlZD<=_&Z-%lLFhIbRY2uk15 zo@Q`{qruiDkFGp`@|^KVl}EkW&P!)w%q1lfnZqaujC_JW zeSl(hZjSNo+}f;@_P znlxzvi82qFOZJlaxn81%d3=i6@jU)8a*HA-$e_IQuQU%ypEKqVRRc&nD|$mQ7CwK= zWqQL3LsUm1e9+6XfdM$E>anSjM4_mRnx=ffQ{;1qVMU9-p}A3%NSmgEh6#?>WRuC{ z*bt5$Qe&A{6W(Wd+soE!E&MRAhL!e4k)Bw?@60Hh6_kkoPxppv^rBX)jzp0=K2ND; z>3$j0l!vgIMm9dO=wviCSH+I-xRo)ah*X zB4_ig&d)_PMk%Q$d_67`d&1k%RV4R>H{)eWVvBAkRFY5l`YjyTs5G(@Z{Ap!DP$)e zzY>KF!ah0VRvlMskf+Yo5zJ2&F;!%Hg*QAMbu%XWR(F?9PDMR`ZD1nm;e}`#@_|P} z$&AFON9+(V9FO*w;0bn)e*;{9I!G$=;Irtatt#HT)7T=?OYl#&_^v cCJp$1@}Hg_e4?kbcl2xeP5m?db2|6>FZ3C*KL7v# literal 0 HcmV?d00001 diff --git a/code/lib/Bio/Alphabet/__init__.py b/code/lib/Bio/Alphabet/__init__.py new file mode 100644 index 0000000..5109136 --- /dev/null +++ b/code/lib/Bio/Alphabet/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2000-2002 by Andrew Dalke. +# Revisions copyright 2007-2010 by Peter Cock. +# All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Alphabets were previously used to declare sequence type and letters (OBSOLETE). + +The design of Bio.Aphabet included a number of historic design choices +which, with the benefit of hindsight, were regretable. Bio.Alphabet was +therefore removed from Biopython in release 1.78. Instead, the molecule type is +included as an annotation on SeqRecords where appropriate. + +Please see https://biopython.org/wiki/Alphabet for examples showing how to +transition from Bio.Alphabet to molecule type annotations. +""" + +raise ImportError( + "Bio.Alphabet has been removed from Biopython. In many cases, the alphabet can simply be ignored and removed from scripts. In a few cases, you may need to specify the ``molecule_type`` as an annotation on a SeqRecord for your script to work correctly. Please see https://biopython.org/wiki/Alphabet for more information." +) diff --git a/code/lib/Bio/Alphabet/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Alphabet/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ee4b0189d363b7d97cf81b05a0e36a5e01f9174 GIT binary patch literal 1028 zcmbVL!EV$r5Y28YfdZ8fUob~jD%n;@6ev0-#g|Fep z_w>piaOA`!Xb&>rwQx`x(cSKlb>cN4%)$Q-l*W z`8N4FQPb0CwkCc}qG-Q#g;-)BXwV}#kC)okRJKqPDhL)7%2Mza;YE!`B81i<2&16H z5YQ7mIXO5zIX*l)oF#F5Rv?FvUKy}C9B7;D4-BAk(RT+5O~HCC#~4{y zEs^4;(6ZQsMu!3dLoU%^uEP*w6yq0xx4WD@t~>^@EHN4Qu1kVOP|UF&bK4)Q>?O13 z-d5c?*M`Cx&WZs_6ojyy?7T?e$WTC`HhbF2mMH7;x@b-DZ4ty8@H94vpc#ngDPDX) zX}w}KJ6;gZdF#9u0a=>&0}UZU5rU)L`FuI%kyyW)H~L)9M@N9O2K*u_S0Vuww$Ww< z{9;q$;DsUG^N#f-H-T*0-MnwJq$Cz^*!JF+QKv!>>x-Qb|B|Sa1S(-#kgO67!iX_g zvI?X*$(AvIZD+07Vs4K+Xe7OMfqE(7Br@hC&tCh#9FR9*jQ!-iD7*U7ut^y*%gDJ!$=>cLUv ztPh9YTmL=sYkb-I<@d?k-Si{z4M{)iYWwBZ)=s)_gtSJ9XDOOXN<+S#9_wYwR^n49 p>> err = ApplicationError(-11, "helloworld", "", "Some error text") + >>> err.returncode, err.cmd, err.stdout, err.stderr + (-11, 'helloworld', '', 'Some error text') + >>> print(err) + Non-zero return code -11 from 'helloworld', message 'Some error text' + + """ + + def __init__(self, returncode, cmd, stdout="", stderr=""): + """Initialize the class.""" + self.returncode = returncode + self.cmd = cmd + self.stdout = stdout + self.stderr = stderr + + def __str__(self): + """Format the error as a string.""" + # get first line of any stderr message + try: + msg = self.stderr.lstrip().split("\n", 1)[0].rstrip() + except Exception: # TODO, ValueError? AttributeError? + msg = "" + if msg: + return "Non-zero return code %d from %r, message %r" % ( + self.returncode, + self.cmd, + msg, + ) + else: + return "Non-zero return code %d from %r" % (self.returncode, self.cmd) + + def __repr__(self): + """Represent the error as a string.""" + return "ApplicationError(%i, %s, %s, %s)" % ( + self.returncode, + self.cmd, + self.stdout, + self.stderr, + ) + + +class AbstractCommandline: + r"""Generic interface for constructing command line strings (OBSOLETE). + + This class shouldn't be called directly; it should be subclassed to + provide an implementation for a specific application. + + For a usage example we'll show one of the EMBOSS wrappers. You can set + options when creating the wrapper object using keyword arguments - or + later using their corresponding properties: + + >>> from Bio.Emboss.Applications import WaterCommandline + >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) + >>> cline + WaterCommandline(cmd='water', gapopen=10, gapextend=0.5) + + You can instead manipulate the parameters via their properties, e.g. + + >>> cline.gapopen + 10 + >>> cline.gapopen = 20 + >>> cline + WaterCommandline(cmd='water', gapopen=20, gapextend=0.5) + + You can clear a parameter you have already added by 'deleting' the + corresponding property: + + >>> del cline.gapopen + >>> cline.gapopen + >>> cline + WaterCommandline(cmd='water', gapextend=0.5) + + Once you have set the parameters you need, you can turn the object into + a string (e.g. to log the command): + + >>> str(cline) + Traceback (most recent call last): + ... + ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout). + + In this case the wrapper knows certain arguments are required to construct + a valid command line for the tool. For a complete example, + + >>> from Bio.Emboss.Applications import WaterCommandline + >>> water_cmd = WaterCommandline(gapopen=10, gapextend=0.5) + >>> water_cmd.asequence = "asis:ACCCGGGCGCGGT" + >>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT" + >>> water_cmd.outfile = "temp_water.txt" + >>> print(water_cmd) + water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 + >>> water_cmd + WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) + + You would typically run the command line via a standard Python operating + system call using the subprocess module for full control. For the simple + case where you just want to run the command and get the output: + + stdout, stderr = water_cmd() + + Note that by default we assume the underlying tool is installed on the + system $PATH environment variable. This is normal under Linux/Unix, but + may need to be done manually under Windows. Alternatively, you can specify + the full path to the binary as the first argument (cmd): + + >>> from Bio.Emboss.Applications import WaterCommandline + >>> water_cmd = WaterCommandline(r"C:\Program Files\EMBOSS\water.exe", + ... gapopen=10, gapextend=0.5, + ... asequence="asis:ACCCGGGCGCGGT", + ... bsequence="asis:ACCCGAGCGCGGT", + ... outfile="temp_water.txt") + >>> print(water_cmd) + "C:\Program Files\EMBOSS\water.exe" -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 + + Notice that since the path name includes a space it has automatically + been quoted. + + """ + + # TODO - Replace the above example since EMBOSS doesn't work properly + # if installed into a folder with a space like "C:\Program Files\EMBOSS" + # + # Note the call example above is not a doctest as we can't handle EMBOSS + # (or any other tool) being missing in the unit tests. + + parameters = None # will be a list defined in subclasses + + def __init__(self, cmd, **kwargs): + """Create a new instance of a command line wrapper object.""" + # Init method - should be subclassed! + # + # The subclass methods should look like this: + # + # def __init__(self, cmd="muscle", **kwargs): + # self.parameters = [...] + # AbstractCommandline.__init__(self, cmd, **kwargs) + # + # i.e. There should have an optional argument "cmd" to set the location + # of the executable (with a sensible default which should work if the + # command is on the path on Unix), and keyword arguments. It should + # then define a list of parameters, all objects derived from the base + # class _AbstractParameter. + # + # The keyword arguments should be any valid parameter name, and will + # be used to set the associated parameter. + self.program_name = cmd + try: + parameters = self.parameters + except AttributeError: + raise AttributeError( + "Subclass should have defined self.parameters" + ) from None + # Create properties for each parameter at run time + aliases = set() + for p in parameters: + if not p.names: + if not isinstance(p, _StaticArgument): + raise TypeError("Expected %r to be of type _StaticArgument" % p) + continue + for name in p.names: + if name in aliases: + raise ValueError("Parameter alias %s multiply defined" % name) + aliases.add(name) + name = p.names[-1] + if _re_prop_name.match(name) is None: + raise ValueError( + "Final parameter name %r cannot be used as " + "an argument or property name in python" % name + ) + if name in _reserved_names: + raise ValueError( + "Final parameter name %r cannot be used as " + "an argument or property name because it is " + "a reserved word in python" % name + ) + if name in _local_reserved_names: + raise ValueError( + "Final parameter name %r cannot be used as " + "an argument or property name due to the " + "way the AbstractCommandline class works" % name + ) + + # Beware of binding-versus-assignment confusion issues + def getter(name): + return lambda x: x._get_parameter(name) + + def setter(name): + return lambda x, value: x.set_parameter(name, value) + + def deleter(name): + return lambda x: x._clear_parameter(name) + + doc = p.description + if isinstance(p, _Switch): + doc += ( + "\n\nThis property controls the addition of the %s " + "switch, treat this property as a boolean." % p.names[0] + ) + else: + doc += ( + "\n\nThis controls the addition of the %s parameter " + "and its associated value. Set this property to the " + "argument value required." % p.names[0] + ) + prop = property(getter(name), setter(name), deleter(name), doc) + setattr(self.__class__, name, prop) # magic! + for key, value in kwargs.items(): + self.set_parameter(key, value) + + def _validate(self): + """Make sure the required parameters have been set (PRIVATE). + + No return value - it either works or raises a ValueError. + + This is a separate method (called from __str__) so that subclasses may + override it. + """ + for p in self.parameters: + # Check for missing required parameters: + if p.is_required and not (p.is_set): + raise ValueError("Parameter %s is not set." % p.names[-1]) + # Also repeat the parameter validation here, just in case? + + def __str__(self): + """Make the commandline string with the currently set options. + + e.g. + + >>> from Bio.Emboss.Applications import WaterCommandline + >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) + >>> cline.asequence = "asis:ACCCGGGCGCGGT" + >>> cline.bsequence = "asis:ACCCGAGCGCGGT" + >>> cline.outfile = "temp_water.txt" + >>> print(cline) + water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 + >>> str(cline) + 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5' + """ + self._validate() + commandline = "%s " % _escape_filename(self.program_name) + for parameter in self.parameters: + if parameter.is_set: + # This will include a trailing space: + commandline += str(parameter) + return commandline.strip() # remove trailing space + + def __repr__(self): + """Return a representation of the command line object for debugging. + + e.g. + + >>> from Bio.Emboss.Applications import WaterCommandline + >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) + >>> cline.asequence = "asis:ACCCGGGCGCGGT" + >>> cline.bsequence = "asis:ACCCGAGCGCGGT" + >>> cline.outfile = "temp_water.txt" + >>> print(cline) + water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 + >>> cline + WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) + """ + answer = "%s(cmd=%r" % (self.__class__.__name__, self.program_name) + for parameter in self.parameters: + if parameter.is_set: + if isinstance(parameter, _Switch): + answer += ", %s=True" % parameter.names[-1] + else: + answer += ", %s=%r" % (parameter.names[-1], parameter.value) + answer += ")" + return answer + + def _get_parameter(self, name): + """Get a commandline option value (PRIVATE).""" + for parameter in self.parameters: + if name in parameter.names: + if isinstance(parameter, _Switch): + return parameter.is_set + else: + return parameter.value + raise ValueError("Option name %s was not found." % name) + + def _clear_parameter(self, name): + """Reset or clear a commandline option value (PRIVATE).""" + cleared_option = False + for parameter in self.parameters: + if name in parameter.names: + parameter.value = None + parameter.is_set = False + cleared_option = True + if not cleared_option: + raise ValueError("Option name %s was not found." % name) + + def set_parameter(self, name, value=None): + """Set a commandline option for a program (OBSOLETE). + + Every parameter is available via a property and as a named + keyword when creating the instance. Using either of these is + preferred to this legacy set_parameter method which is now + OBSOLETE, and likely to be DEPRECATED and later REMOVED in + future releases. + """ + set_option = False + for parameter in self.parameters: + if name in parameter.names: + if isinstance(parameter, _Switch): + if value is None: + import warnings + + warnings.warn( + "For a switch type argument like %s, " + "we expect a boolean. None is treated " + "as FALSE!" % parameter.names[-1] + ) + parameter.is_set = bool(value) + set_option = True + else: + if value is not None: + self._check_value(value, name, parameter.checker_function) + parameter.value = value + parameter.is_set = True + set_option = True + if not set_option: + raise ValueError("Option name %s was not found." % name) + + def _check_value(self, value, name, check_function): + """Check whether the given value is valid (PRIVATE). + + No return value - it either works or raises a ValueError. + + This uses the passed function 'check_function', which can either + return a [0, 1] (bad, good) value or raise an error. Either way + this function will raise an error if the value is not valid, or + finish silently otherwise. + """ + if check_function is not None: + is_good = check_function(value) # May raise an exception + if is_good not in [0, 1, True, False]: + raise ValueError( + "Result of check_function: %r is of an unexpected value" % is_good + ) + if not is_good: + raise ValueError( + "Invalid parameter value %r for parameter %s" % (value, name) + ) + + def __setattr__(self, name, value): + """Set attribute name to value (PRIVATE). + + This code implements a workaround for a user interface issue. + Without this __setattr__ attribute-based assignment of parameters + will silently accept invalid parameters, leading to known instances + of the user assuming that parameters for the application are set, + when they are not. + + >>> from Bio.Emboss.Applications import WaterCommandline + >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True) + >>> cline.asequence = "a.fasta" + >>> cline.bsequence = "b.fasta" + >>> cline.csequence = "c.fasta" + Traceback (most recent call last): + ... + ValueError: Option name csequence was not found. + >>> print(cline) + water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5 + + This workaround uses a whitelist of object attributes, and sets the + object attribute list as normal, for these. Other attributes are + assumed to be parameters, and passed to the self.set_parameter method + for validation and assignment. + """ + if name in ["parameters", "program_name"]: # Allowed attributes + self.__dict__[name] = value + else: + self.set_parameter(name, value) # treat as a parameter + + def __call__(self, stdin=None, stdout=True, stderr=True, cwd=None, env=None): + """Execute command, wait for it to finish, return (stdout, stderr). + + Runs the command line tool and waits for it to finish. If it returns + a non-zero error level, an exception is raised. Otherwise two strings + are returned containing stdout and stderr. + + The optional stdin argument should be a string of data which will be + passed to the tool as standard input. + + The optional stdout and stderr argument may be filenames (string), + but otherwise are treated as a booleans, and control if the output + should be captured as strings (True, default), or ignored by sending + it to /dev/null to avoid wasting memory (False). If sent to a file + or ignored, then empty string(s) are returned. + + The optional cwd argument is a string giving the working directory + to run the command from. See Python's subprocess module documentation + for more details. + + The optional env argument is a dictionary setting the environment + variables to be used in the new process. By default the current + process' environment variables are used. See Python's subprocess + module documentation for more details. + + Default example usage:: + + from Bio.Emboss.Applications import WaterCommandline + water_cmd = WaterCommandline(gapopen=10, gapextend=0.5, + stdout=True, auto=True, + asequence="a.fasta", bsequence="b.fasta") + print("About to run: %s" % water_cmd) + std_output, err_output = water_cmd() + + This functionality is similar to subprocess.check_output(). In general + if you require more control over running the command, use subprocess + directly. + + When the program called returns a non-zero error level, a custom + ApplicationError exception is raised. This includes any stdout and + stderr strings captured as attributes of the exception object, since + they may be useful for diagnosing what went wrong. + """ + if not stdout: + stdout_arg = open(os.devnull, "w") + elif isinstance(stdout, str): + stdout_arg = open(stdout, "w") + else: + stdout_arg = subprocess.PIPE + + if not stderr: + stderr_arg = open(os.devnull, "w") + elif isinstance(stderr, str): + if stdout == stderr: + stderr_arg = stdout_arg # Write both to the same file + else: + stderr_arg = open(stderr, "w") + else: + stderr_arg = subprocess.PIPE + + # We may not need to supply any piped input, but we setup the + # standard input pipe anyway as a work around for a python + # bug if this is called from a Windows GUI program. For + # details, see http://bugs.python.org/issue1124861 + # + # Using universal newlines is important on Python 3, this + # gives unicode handles rather than bytes handles. + + # Windows 7, 8, 8.1 and 10 want shell = True + if sys.platform != "win32": + use_shell = True + else: + win_ver = platform.win32_ver()[0] + if win_ver in ["7", "8", "post2012Server", "10"]: + use_shell = True + else: + use_shell = False + child_process = subprocess.Popen( + str(self), + stdin=subprocess.PIPE, + stdout=stdout_arg, + stderr=stderr_arg, + universal_newlines=True, + cwd=cwd, + env=env, + shell=use_shell, + ) + # Use .communicate as can get deadlocks with .wait(), see Bug 2804 + stdout_str, stderr_str = child_process.communicate(stdin) + if not stdout: + assert not stdout_str, stdout_str + if not stderr: + assert not stderr_str, stderr_str + return_code = child_process.returncode + + # Particularly important to close handles on Jython and PyPy + # (where garbage collection is less predictable) and on Windows + # (where cannot delete files with an open handle): + if not stdout or isinstance(stdout, str): + # We opened /dev/null or a file + stdout_arg.close() + if not stderr or (isinstance(stderr, str) and stdout != stderr): + # We opened /dev/null or a file + stderr_arg.close() + + if return_code: + raise ApplicationError(return_code, str(self), stdout_str, stderr_str) + return stdout_str, stderr_str + + +class _AbstractParameter: + """A class to hold information about a parameter for a commandline. + + Do not use this directly, instead use one of the subclasses. + """ + + def __init__(self): + raise NotImplementedError + + def __str__(self): + raise NotImplementedError + + +class _Option(_AbstractParameter): + """Represent an option that can be set for a program. + + This holds UNIXish options like --append=yes and -a yes, + where a value (here "yes") is generally expected. + + For UNIXish options like -kimura in clustalw which don't + take a value, use the _Switch object instead. + + Attributes: + - names -- a list of string names (typically two entries) by which + the parameter can be set via the legacy set_parameter method + (eg ["-a", "--append", "append"]). The first name in list is used + when building the command line. The last name in the list is a + "human readable" name describing the option in one word. This + must be a valid Python identifier as it is used as the property + name and as a keyword argument, and should therefore follow PEP8 + naming. + - description -- a description of the option. This is used as + the property docstring. + - filename -- True if this argument is a filename (or other argument + that should be quoted) and should be automatically quoted if it + contains spaces. + - checker_function -- a reference to a function that will determine + if a given value is valid for this parameter. This function can either + raise an error when given a bad value, or return a [0, 1] decision on + whether the value is correct. + - equate -- should an equals sign be inserted if a value is used? + - is_required -- a flag to indicate if the parameter must be set for + the program to be run. + - is_set -- if the parameter has been set + - value -- the value of a parameter + + """ + + def __init__( + self, + names, + description, + filename=False, + checker_function=None, + is_required=False, + equate=True, + ): + self.names = names + if not isinstance(description, str): + raise TypeError("Should be a string: %r for %s" % (description, names[-1])) + # Note 'filename' is for any string with spaces that needs quoting + self.is_filename = filename + self.checker_function = checker_function + self.description = description + self.equate = equate + self.is_required = is_required + + self.is_set = False + self.value = None + + def __str__(self): + """Return the value of this option for the commandline. + + Includes a trailing space. + """ + # Note: Before equate was handled explicitly, the old + # code would do either "--name " or "--name=value ", + # or " -name " or " -name value ". This choice is now + # now made explicitly when setting up the option. + if self.value is None: + return "%s " % self.names[0] + if self.is_filename: + v = _escape_filename(self.value) + else: + v = str(self.value) + if self.equate: + return "%s=%s " % (self.names[0], v) + else: + return "%s %s " % (self.names[0], v) + + +class _Switch(_AbstractParameter): + """Represent an optional argument switch for a program. + + This holds UNIXish options like -kimura in clustalw which don't + take a value, they are either included in the command string + or omitted. + + Attributes: + - names -- a list of string names (typically two entries) by which + the parameter can be set via the legacy set_parameter method + (eg ["-a", "--append", "append"]). The first name in list is used + when building the command line. The last name in the list is a + "human readable" name describing the option in one word. This + must be a valid Python identifier as it is used as the property + name and as a keyword argument, and should therefore follow PEP8 + naming. + - description -- a description of the option. This is used as + the property docstring. + - is_set -- if the parameter has been set + + NOTE - There is no value attribute, see is_set instead, + + """ + + def __init__(self, names, description): + self.names = names + self.description = description + self.is_set = False + self.is_required = False + + def __str__(self): + """Return the value of this option for the commandline. + + Includes a trailing space. + """ + assert not hasattr(self, "value") + if self.is_set: + return "%s " % self.names[0] + else: + return "" + + +class _Argument(_AbstractParameter): + """Represent an argument on a commandline. + + The names argument should be a list containing one string. + This must be a valid Python identifier as it is used as the + property name and as a keyword argument, and should therefore + follow PEP8 naming. + """ + + def __init__( + self, + names, + description, + filename=False, + checker_function=None, + is_required=False, + ): + # if len(names) != 1: + # raise ValueError("The names argument to _Argument should be a " + # "single entry list with a PEP8 property name.") + self.names = names + if not isinstance(description, str): + raise TypeError("Should be a string: %r for %s" % (description, names[-1])) + # Note 'filename' is for any string with spaces that needs quoting + self.is_filename = filename + self.checker_function = checker_function + self.description = description + self.is_required = is_required + self.is_set = False + self.value = None + + def __str__(self): + if self.value is None: + return " " + elif self.is_filename: + return "%s " % _escape_filename(self.value) + else: + return "%s " % self.value + + +class _ArgumentList(_Argument): + """Represent a variable list of arguments on a command line, e.g. multiple filenames.""" + + # TODO - Option to require at least one value? e.g. min/max count? + + def __str__(self): + if not isinstance(self.value, list): + raise TypeError("Arguments should be a list") + if not self.value: + raise ValueError("Requires at least one filename") + # A trailing space is required so that parameters following the last filename + # do not appear merged. + # e.g.: samtools cat in1.bam in2.bam-o out.sam [without trailing space][Incorrect] + # samtools cat in1.bam in2.bam -o out.sam [with trailing space][Correct] + if self.is_filename: + return " ".join(_escape_filename(v) for v in self.value) + " " + else: + return " ".join(self.value) + " " + + +class _StaticArgument(_AbstractParameter): + """Represent a static (read only) argument on a commandline. + + This is not intended to be exposed as a named argument or + property of a command line wrapper object. + """ + + def __init__(self, value): + self.names = [] + self.is_required = False + self.is_set = True + self.value = value + + def __str__(self): + return "%s " % self.value + + +def _escape_filename(filename): + """Escape filenames with spaces by adding quotes (PRIVATE). + + Note this will not add quotes if they are already included: + + >>> print((_escape_filename('example with spaces'))) + "example with spaces" + >>> print((_escape_filename('"example with spaces"'))) + "example with spaces" + >>> print((_escape_filename(1))) + 1 + + Note the function is more generic than the name suggests, since it + is used to add quotes around any string arguments containing spaces. + """ + # Is adding the following helpful + # if os.path.isfile(filename): + # # On Windows, if the file exists, we can ask for + # # its alternative short name (DOS style 8.3 format) + # # which has no spaces in it. Note that this name + # # is not portable between machines, or even folder! + # try: + # import win32api + # short = win32api.GetShortPathName(filename) + # assert os.path.isfile(short) + # return short + # except ImportError: + # pass + if not isinstance(filename, str): + # for example the NCBI BLAST+ -outfmt argument can be an integer + return filename + if " " not in filename: + return filename + # We'll just quote it - works on Windows, Mac OS X etc + if filename.startswith('"') and filename.endswith('"'): + # Its already quoted + return filename + else: + return '"%s"' % filename + + +def _test(): + """Run the Bio.Application module's doctests (PRIVATE).""" + import doctest + + doctest.testmod(verbose=1) + + +if __name__ == "__main__": + # Run the doctests + _test() diff --git a/code/lib/Bio/Application/__pycache__/__init__.cpython-37.pyc b/code/lib/Bio/Application/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee141eb6e853d2693422de358a3ddce501fc2d2c GIT binary patch literal 26089 zcmeHwTWlOxnqF1k*lbc1Ez7d4@yInCYst3BwxrqdZ7hv$ld1zUG$&2LkI!-R*8~#(< zvOH_m@@y~HvR7^S&8_C-H@}*f-@d*)7A-}$+@GmbX?vAL??>Ytmd30(afUU`$=F>lH{?oE3y zcqhCUy_4Qc-YM^8?-lP=@3i*~?=|n6-Wl(8&+)$H&3NDT-tf+Pf8fn}b6(YZbK~{Z ziucxry*lxc<;{BwUs~RRU)Y*_kY7E9J2mfZ+cF0Q{T*Dv7u9q(OSzbn@#aQ&Y5KCa*QPoVsZxcVaijUT)c{UUuKfTh>Q@+Yj89v*R~5-F7qF37x3p zxQ&J%hK{?t+iEu4sM%?UPP6S?ZFY7KqRmd*nY(#)`R1pKD~r`yrLwZw44s{h*KPR@ z&f1*_g(JW1`JS`h2^_B(_>IWvhJMZYByx6x&R)~=(b1;wthr&c;dIuvP$G2pH=B*k ziW~S&*j;P1+%QBFYX?rJ8#s;5&W_vmoK~~#JNp5~<_FMDjJMNK3UvZ>xALRN6Y@buHSAn+Z%(AF>62c^|ZreQpvy^HTY2GKQQvB z(`gBeH~rSGbI|El8h~yTbOE7(g1CoHLry>RqX_2$usyfc1?)QOj_+dTyKdm__>mvf zI3Q=ei8b5=&_*^zzyj!PEK#uIwps^Rr@hWLitjaDwH)1MD_Ur_E4S1->s7^SVAY5d zSnlgxw9=8{z=VwsN4Dc)#qo994XkN5@X!fXe%tqV`FLP78LX-#>*#cM*{0`4{=BpA zJDcvF?|6O#c;d+ee-}7}rw&oT-}Bp1S0)hnI~^3otafUZJHS7{(sw<~aNj`<;0*8( zBWkuh4g~NX!X&mQP~hWcq>;6CP}y%rn=;9C@_Myvm0y$HE%jdh8$WRudRG=!ztF$u z7T&La@#Y(`9mR#n52LtP=TEP=AU_qieXy{3u!|*FJ=O(%1Yx5S`1Q{A>NpG3+gQuh zB2O>Gd5kR1dwwe}tOuQ)I1jpti=dE}ALraKF8HnHdR*FQb=KTgoa+Q}fkTcv0~m9+z6~&YI`O`6xJuj{$M(216SJTJizw zQF7uxgGJc@xlr-)Ug1j{lA$QvtLTk+WpJ^QHx8~f5l?4Gdoc()fxDcutlJ!> z2L=MfbMY@Ff1m-!2yO+3DwPP+Rf~|Gt2K#H?wIsg12*MK;2qOmRNNS@MD26c!xCP47R)je&%lYM_X4A zjjcveqbq-bWY#r&!`E==+kLBU^>aAd{XCAjegQ{lHNeqa@1;-L&8X?Nnmu7nN*dIb zs)e|cf+WrZ9C1m3LVh`mVq6IQ)_O3DUV=FusyvWP{)H9H;vv*o^#zUAM7~X@++p9pBC@?A!G^q(xM( z*LDx$a&obb9>OvXR<308ulH)E&LUU3mXvKk$CD3X!#V<=HJlur>04XYL;I2SP&B?r zIWPaJ^)Ua)I<)Ut?NZd)V96_@F2AL|y`Dd1En8kG zxwD>=>oGvN-23JYim^z@44?xFf+~RZLKn5zuKb$g3*_(zIP1MN5)H^j`m(!b{aLSrxE6_P84tLugCjsDZj1Ula}tVocYvnhV~-K;ryPxFKt)4hy_D zR~g;M%812~YGy4sUE-^(#Va`RfU8>;zWfEo zZspetgd*_NxlOqmLYh7Xh`pnn;B0f=ISa*0eXG?%I=vKCtF}yZc#gob06o_0FdM8; ztcaYNT46MvsnSb;8y8Rx)iEL~C1t%HR{$?jJaIl*uit}Gpq~JoUZ;WnYOMGU5AX6o z3d$S^`rs@MpW!QbUp`*QRmRHGa<_rI=0m7n#y8x=VL)c(Agw61tkt5HS|u&D#=LQ_ zg8OA}!kfhJIPDAkR$x)U%7C1kprwI3le9MQdkmHaEDk)G5LhlqYB-0XB)UVAp8$ZLqCFgnrq zs355VA`nv@J?aSm)~Y=vF!YL+59JF&mhy-LbDeM(D&0Cp?56Tr%O21&3Uq}b`}bYe zh9Wo%(TfiEoeq?KNHI#(#UEb1xxAcc+F{Lce%$F|sBP-WGK|iyGFe0$Xh7dF+OyU- zwZVbz2%~NP05T0?IN0cN#33w+juJ>MsGNZ=hQ6DGZUBne1+!&UOc-WJ)bzyw0VpZ? zgqzS58DUlsF3`7}J8U?!o-%G$Ziszz@koKW4R;rPwJ)AOH}Bva!WNqC#dEdqrm|4# zN=w7d&q35&oZV-q5T{RQN@^$YOq6NZUCrGtVId1(G`cXc8vvIS5|HY(jf_SiqpImX zrTz2g2B(`mgyL~w=+QIG=E70H49up{@?GMX89uE6WofuAAc=S2xE}2dSX{HvyM1EK zEN3juj9mBw+MRmYQAMp@G9B~eC!|9Khg*!$QnX z!%C$&*Maff9I~y+*w{4fdaxqJ#sE0*FOZAoWb-MO{6@?{2)Is@;rSSZsb2#`#INC_ zr5X5O!Vj)oyLRoPk3PDFzmHaCWWvPKw94A(Dp#IRC0S4O5&1j2b!n^?!A>ykEB)Q{ zqR>h%odsR};$U58!3_T5(UCg~W}KO)$snDDl=sPbN?B$^W-M~&&Jd#>Dx+(MBo52cg^<&Z9f~ax zi!#aZ0A7j0 z4k)Neo-~Rn(v(sIr33@dPXCQtS5`g-m)UCuoi-H#(0kA%&#A#Fz{+%P(sSlW%g(3p zh}?hsvv%`7*0Za0j~(|w7%yiF4a%e90{N-DRjS&ZX4?b*sX153$l6$jJ->C3a%!a| zr7i@4_^wNLlybYR!37gQS}7OmdJ`6hVPXyuG|RF7>KINi149&UgblbLof}});a#Ph z-Ic|v!HG08F8~JrCqI7tC#A|E<>o2tr_?rznLTAADM@A;Nl5Z(XOeJ)VTPA#X(Kt_ zQ-Jb!%@1YeKxIv+8qp@`49c>c`cR>Oj1WQG@m1@v6jdHh@IE6@ z!M{Zf)Ydgh(Ii^=&$u_1xpxfr{sQ;P$vtHe9*!xC@UW~b!ozX&F?Cpx@g9G`XwB(< z#XBxNPrquNvfi~0EA11w_rl=4Xu6O79@u^KfFnkYW932Na3XpE@H>IoO+3hB9+&P| z_h}+50O)fu- zFc<9qXe>tCRJGpxvbGBoQ&Btf;W&m?Mdwz{sNmsJuex|2Rw1wg4j9ceY3Lp7`c8eB z)??#}w#$0ozLi8f94KPIva^Ugf$cYUA^ac$*uk&=H$J^vH|V9#sQQAe=m459<5S?w z;)ErprV1c+N^Qr0i7WJWRK%;-{w=k*hA;-YCY?Z>xKKCY$=k#1#RP>ms{1?mE%oQ6 z(VBgTCd%p&?gZtbk*xy z_EOc3r|J;$X(Rx5?E5$r2xH}p5f@nbLlr%^^b%M=h<2>rr4fTQ{>R#ftqwGw@KO!U zLzJf1Nkh^&oKct2*1);+S5nnZvNvHxc5nV!`Wp>^QIJX)9e)e>8jvbT_3cLt$r4t< z8+Z(V*Z1)g7bV)49e*GWt1Ec;%Co`<2G;YyNL73J*sY$sL0ZjLdLEn;2r58r)qXHA zzYr0>#^$^ekyV4X!BI?IvNmT8YOn9MYvKg&9qRVKu35vLCF0bCLkvoFr_qEsK&Y5D zJk+^mQQNaUYV9;}qp6a{o~~72iYGBTYK!8&jVq>~FrK=S1kuDj5a)qCagqEXj4MrO z9yDMYetdjLCdA__kQAy;C3P<#dg^)cF?gHn1c>Ud2!*7vi6#=BE4Rb=rMj?@;oB2X zYZ`Fu6Xx*FFAJ>WasuFZ3`1n-FCMQ0Aps%ypo<8&?1YMjr<4nP4(J@0w)X*+F!-1k zVzKkO9Y0fU^%KXil-$FUhzmk;B+0%$RNYTQf0-3%G!Gg_whAS9agtvV$^?xv(4|LWZ+~m!n1_nphbcM;^?t(ED#cbhl}B zsw@SAIH?p5mA6hbNdVuOyLJ1M&#$N?l*G1Jf{`f!UyXhXl(gE#B;t)iUV_@Bj}&9H zPReEyThE2ELKKA(gcOU-4kEMKAucwRj@MM3kX9M>9TBJCS87|eQ2*&1YlBsc-8txsGnS{APlHvomB|W& zo+ss4OA0GfGvlx212N8j*Ty6}=7%daWTL`_z1U!9D_9$$YTPC_A=;O&8vX!Lx$STt!kzg5!=%n+gJqKXG)}DJCrIc}!cOL1gF^N= zwMwNs7`NCF34Fh%)v7l@-8SgehI6SLfvVvELyiRQ6qb>ev{hWT?m@ANN>sO$q9w@$ z`Cj!S$W!CN5!tJxtJarNovQcEo1*(FPY4*~K1>QaPu4r|VS}>TqS+!!?-?}96@X>{AO6a)1bT0b1mTeN z+#ulip^C__0P)Xo0wCt^|B(h_bO`YD3krxpxq_hFkNP?Bgno?tmt3nruj|7i0Eu$+ zm!5(X$Oalx9qbqCV_xYne{WLij0byAD9gy9W2@tj@;3A73iS#+s-`5CCLjK3Gy#ul zc4^8kW#aPVFqt9;aef_{IFju`#;^#b{2;X~#22^cB6pJ!W)(6^3_US|#hRu$>KV3| zq>UWSGD)n}n)8_?m}v7u+3ocGg{Bcm(RFYT*wtb^!wzWq8*W3?-qgX7*dLM~uZ)F# z(?pVrkC-E(X|3iqf=SwudVTTM?Zs=bkgls|Fa`qW_Tmq3evUiH@+KtsS?@C06ha+n z9{Nb~R<{w(UQ(WeWybkj?Qc-dS!3q2N67|5@XZ%rE=nej2ggAg9cQe36qqL7V-(QXEgLaYbp7BkU-1Ic1Ivudr9Ys9@3iZfwF zQH?=GZPmNZPmt$x{tIVr%|&+0MyKOd^;CeOV%P)MRZ4QrSyZ_t@O+zg#7mZR=ty$+ zU@Zsf*ECNBffTVZ5)50KStzC*L>sWx0zomPsL)RB5WM@yLQkm&3BM})^c+@l__UNh>fXsIHu3 z)->`_?GLT?BqRV7lz#3JqEAXeaXnNL&Ym~~mHq+ts1_tuP*ffOS(SKfB-Pub9n!l) ze~%kbThaN4z(c^) zPr15amB9?|rrPD6sG&tf5-s?x=WKFLR;fa7En zp45=DSx$PkPfBev9SWgj$U+!12xSv~>sAwKB)~Rpx0_upKvOCfKqcYfrmf*}P(7H3 zlH|zDCrHU{f6h&E$CQvjoq@`|A^ZfbL06yx1ZGpTRvl}ZA^H(98u4R{B^91pIWprS zM-)m^t_hAkCD@cX!*@z=yqJ4PFlSP^*KA-rg!0}i$BwdFgozHhS0^$I;@^{9ii7o4 zINNi**GDqRl%cFNq%eAC;sZlm>>>OZ&A<{l5m^zO<%i>sphY0bVZ1-?6~U_iRGt)|w3pFeCBhb$2k`A;V@9dZWQsz+*hjL%FUp6N zKGP!XM>diQCJv$ChkxFm2>v{pMCt)GlzjNB{$%hgTu;h%W$PGH4|2i(OlF642c%v+ zoT7iZpU0Jx%mn9iG8f*R>yPO&$8dEyI^I8~M%AA{%LVj=-*v`P%kH~(jP0u3p4o=6-*ufm zDcvRI>DjGlvT>z_EsZUr``{KXQB1s=q8{oNPPES?S4rbH2v2UVdH@YEwuhuv3JJ|U zgm@*5F2nCvFcW1~B25AXGik6%+6G7w8ox>n23tTRK^O%#o$4KNW&nc}s$jzXY3P|L zhs`!}d7scXWo|miG?R^~bQnb9hQ+=GHJWNF(vWqRDqbSiBorz#8l%@%h~{8A?3w6y z=oaHLlSub9Bzy13%QUQ=Ir@#}lRQI}j|?3^b_B`}UD*#{8dFH{w&(A?-DbWdP|e-z zAa@voKsF=nVBabX(778(U+}AfCu$oG3d)R4Q^Zx(5#qeu-iSon2=<{Y%v@NNStrZ? zI4C#v!31?04HZqZurLLbK7r(DXiGd-+qE$|GX#u&IJJ}!u$r^%`#Pa?7N9blgp+hp z#4%+(*%|YJ&W=^kg5A!2YtOUOBx%58)jm_)E2Y1royYzv9wh1=*c*}p)u{o?V=E>P zpm3{1^@}sER@bu0(=&;#mnx5}sgx>s5JjIC^o5NT&Nyd5fpCH_hkmdI23l7fL3X9; zbAZK^PZE#WzUWbjDFxFI1PQ9ryMKvtDKXTPsi)eWBdKH4yn5?`fdVW}rb_ETucD5G zTQPi0@)MRWD5{e0=81TjeoSMEep?3_y1GNY!Mcf0OvkacTmn02C;MjMi~@Zo^2li* zd%Lq^+8*A0I9gl@bkfYwHn~-@f!Q>LTjd}>rdu836WuoaDM9GA>fkK&sE63vyN}KLU`!~^ z-OSbLxWL_8*ry-H$Q0V6R8XM>AF5wmN%18v-1_9!B7%$uA(Bn7=?eUECmxp();W&i zEon(Yj_8_iML_zDZ2AXtT;%%6mP~Hu@4#w2iw>$f8D$UW>Cl7wyga}Gt}j`kI)sFr zV`=1k97tA|DXflR2pqubu?Aj3;4!g=EYwtmL^4%*KB2IyPP=@_$v|bBC|IxmSE|mp zFaqnXf@G;o=1TB1Fkht%KN_wk=ucx73-1@;Rihsw`77tAa6KvCQgIr7x7U!_QhGw> zit3iBcEzQYm8GTXaTWD>pP=}Fhwt$qrh#l7ZSXSPKgxJTK(D))2?4z&!7dL!;~_N? zWS5HE6I}W{p^ZP>;y;J>|n1*vrTN$hLJGG?0z^(=7G?V zW+B%*689m$iI`)SaF}^#hSZyfy+TMYFZbcS+mCOxc?qmES z(#=0fHjet9;4&Oq5)w&4YZ9P728@!#&2Jo?zR44P%I<@|jR8HTJ0idjGTogX*&Wcn zUSFz~MxvBplOR$$l@v>Ujt;J}1Db~A0&pZB@ZJDU(4Tf0-*5(p0i1xrhPP$#tr_ZV z8AZvdsvvJoIV9X0X!mv!w5Oq=w0|rp_#)k{(6pgc3EMQW6TSYz&xt>w^V!lTKcd@H z)kd^}&i{o4_?@^L^#E>OYVr%NgHxp@!0QBEPxj6s6?smsX7F^TN|C80A^fL_&r|n# z1GqT4v+d?iH*kS^z->q;x3#aG#@K5!tMvJZiDbGHE2~U z+_|Z|u4sfsl7`%*m6^>hHZ?(EaXm)NW>jgFFp1qsYWt|hFtkC|0P_NtEKwRy`sy)}nJdxQF>+sw?vYA^uuVhy{t0Ue<+he`M zH<>uM7H_?m3fFjGSJV_XE zK?N%^F*F4aiCp5>95!9OAql?-@iXB!iA~B2a}dcyN#v&CNPnVL zAA>j1s+ZwcwCdv?vQ3c+K+T}A)cfYL!EIMt3=%RU)|?F`kbI*$p%N-S#V7)whb0_h zCeS7n6tsDpZ~DOduVg6$mv`29;1rP3BD#j8v`D$~U|lKx_gFlY;YJ+^D8A**E#oTH zN4-D?M`)zIX)TG5Y9*+aCa-WTjE^B!Rt{c4vI=&t7-j%~Q=|~Iaj->PT+}kRbZ(y;71U27QBx&RW4I3>hDIY&cH~!(> zBuzYp!;x%5V;_nvNm`{!ASfaA3MoWA{9?r%s7QpjTCu;&Gmi&ZHiB0tHplkfFrcYG z{hWZF#)&n~w05MfGw5!g$n}n=Ko(uBTDY-vW97LNvcH4oZ?dCFoRq&+C_!<>U*&kf zCk=efhYNTa-=|d7C7aCsSNV|&8Qigk{j2RA&!>_-vz7!rP=P_6t)lIB<26gv1SQ0T zXHXvPG_kAb_exX(9ib9Y#eT0u;Wav0oe6@)9k}sZQKFt!#tFhM-CSA3s8+-XfqZW# zNd?B^1jiD5+PYC~A?UYv3}iNti*^*w!x53G#hou`x12Gas2^-3hBsDU9 zG(x8QJ8~cz%9i94i5yWok&em{8rEPcYFuyP1VKN@5rht>A_n0oJ!V9rAn}p8d3dv0 z0beAGR!hGFc_1u5l?n1TFTN9R49W->AoyDtvw{y{@&rr#KNzSK$^NIv_zhHq@jsst z{yDn+XKY)H@KZ3tOTw%hxNklL@nw9E8sXSJ4wnp#aJ>FYTr=F*G>)w}S9I*fEn$%c z%y7f(kwrhs_X1qi=B6o`QR$UsmWlb%(#o0{V!9{Ft(K^v6Jy6Qef{>x(*tFeTw% zVDw7O6jgBqMgJG#*9jD}CWf>`y9XbhMwG9i?sEhZwL}YA;*kzN6qUAwDv2GmpKa1ec)#mYI`9&PNit&p=dpqp7l$#{GXCO z%415;s!RvGKR|aVV_n8q{HO!YFT7k*IYcqZB6fWyK>QR5DZSena)VW)Ug%;RItfuB z_bx+!c-%&`SKf!EUoen~L)QlUiS)@` zs+Y&x&c%Pr^4FowA$Nbw(xrC zCm`qlV7F5!X(0%J5P@ATmh97b0qe*ia95>l@fdRU)27HwO^8;kx_^iV|D9cMk&#hf z2zX~SRyuex8Y{DI`rwBs*87)PoP-M#FS&5$=uE@9wGJ54S8pO5K+22(CR|j~0ya&vW42=1y_b)6DQNtBiUn6a&(=k;D2igidxk0pbT`JB$VnpTZsAm) zTmiE|+V^RmBWD(SE*{yz1)O2mg*qeW=o_7`T6F&sO)kntM#BiOg$F6F+PgHNTJ$G}|^e<$crayTTE0YXMZMS0D|F&$##Bi`qZ z6eNjQC*D?6qDxR*rzuQzvNB#1ua!7MyYxk-iCr1@Y(VDzl|9od%pkvk8Rx=M=}CzZ z)OCZ1UWCoKjQEV40snu2hr@(NE#M?ERJiGDO!o%E%bm;>av}v9crKp-Ol3$@{s#~1 z5O_&HlUMkKt#i4#fl9xTjyWos+yR{+grp&O#reAXEb@uNXepqIgOK1DBF^~F8GJ~g zFMJU|+h?^u;o*;Y_#+(RT!6RR zgB$K5qd)lTynUMoF-n$sw!%^;uF31;Ze`w{E)jMK;7Y_!{6lKTAaxiK8N&9JkBfs+ zQYcjpjl_w|AF4#5OXRO%4+pM;{wvbkH6<@*I3Z_Kd|AVD|CREk{D})wf79aMrKw}n UOVe*o&rP44exY0~k5zL22ie2T!T 4. Added in BLAST+ 2.2.30.", + equate=False, + ), + _Switch( + ["-html", "html"], "Produce HTML output? See also the outfmt option." + ), + # Miscellaneous options + _Switch( + ["-parse_deflines", "parse_deflines"], + "Should the query and subject defline(s) be parsed?", + ), + ] + try: + # Insert extra parameters - at the start just in case there + # are any arguments which must come last: + self.parameters = extra_parameters + self.parameters + except AttributeError: + # Should we raise an error? The subclass should have set this up! + self.parameters = extra_parameters + AbstractCommandline.__init__(self, cmd, **kwargs) + + def _validate_incompatibilities(self, incompatibles): + """Validate parameters for incompatibilities (PRIVATE). + + Used by the _validate method. + """ + for a in incompatibles: + if self._get_parameter(a): + for b in incompatibles[a]: + if self._get_parameter(b): + raise ValueError("Options %s and %s are incompatible." % (a, b)) + + +class _NcbiblastCommandline(_NcbibaseblastCommandline): + """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). + + This is provided for subclassing, it deals with shared options + common to all the BLAST tools (blastn, rpsblast, rpsblast, etc). + """ + + def __init__(self, cmd=None, **kwargs): + assert cmd is not None + extra_parameters = [ + # Input query options: + _Option( + ["-query", "query"], + "The sequence to search with.", + filename=True, + equate=False, + ), # Should this be required? + _Option( + ["-query_loc", "query_loc"], + "Location on the query sequence (Format: start-stop).", + equate=False, + ), + # General search options: + _Option(["-db", "db"], "The database to BLAST against.", equate=False), + _Option(["-evalue", "evalue"], "Expectation value cutoff.", equate=False), + _Option( + ["-word_size", "word_size"], + "Word size for wordfinder algorithm.\n\nInteger. Minimum 2.", + equate=False, + ), + # BLAST-2-Sequences options: + # - see subclass + # Formatting options: + # - see baseclass + # Query filtering options + _Option( + ["-soft_masking", "soft_masking"], + "Apply filtering locations as soft masks (Boolean, Default = true).", + equate=False, + ), + _Switch( + ["-lcase_masking", "lcase_masking"], + "Use lower case filtering in query and subject sequence(s)?", + ), + # Restrict search or results + _Option( + ["-gilist", "gilist"], + "Restrict search of database to list of GI's.\n\n" + "Incompatible with: negative_gilist, seqidlist, negative_seqidlist, " + "remote, subject, subject_loc", + filename=True, + equate=False, + ), + _Option( + ["-negative_gilist", "negative_gilist"], + "Restrict search of database to everything except the listed GIs.\n\n" + "Incompatible with: gilist, seqidlist, remote, subject, subject_loc", + filename=True, + equate=False, + ), + _Option( + ["-seqidlist", "seqidlist"], + "Restrict search of database to list of SeqID's.\n\n" + "Incompatible with: gilist, negative_gilist, remote, subject, " + "subject_loc", + filename=True, + equate=False, + ), + _Option( + ["-negative_seqidlist", "negative_seqidlist"], + "Restrict search of database to everything except listed SeqID's.\n\n" + "Incompatible with: gilist, seqidlist, remote, subject, subject_loc", + filename=True, + equate=False, + ), + _Option( + ["-entrez_query", "entrez_query"], + "Restrict search with the given Entrez query (requires remote).", + equate=False, + ), + _Option( + ["-qcov_hsp_perc", "qcov_hsp_perc"], + "Percent query coverage per hsp (float, 0 to 100).\n\n" + "Added in BLAST+ 2.2.30.", + equate=False, + ), + _Option( + ["-max_target_seqs", "max_target_seqs"], + "Maximum number of aligned sequences to keep (integer, at least one).", + equate=False, + ), + # Statistical options + _Option( + ["-dbsize", "dbsize"], + "Effective length of the database (integer).", + equate=False, + ), + _Option( + ["-searchsp", "searchsp"], + "Effective length of the search space (integer).", + equate=False, + ), + _Option( + ["-max_hsps_per_subject", "max_hsps_per_subject"], + "Override max number of HSPs per subject saved for ungapped searches " + "(integer).", + equate=False, + ), + _Option( + ["-max_hsps", "max_hsps"], + "Set max number of HSPs saved per subject sequence\n\n" + "Ddefault 0 means no limit.", + equate=False, + ), + _Switch(["-sum_statistics", "sum_statistics"], "Use sum statistics."), + # Is -sum_stats a BLAST+ bug, why not use -sum_statistics switch? + _Option( + ["-sum_stats", "sum_stats"], + "Use sum statistics (boolean).\n\nAdded in BLAST+ 2.2.30.", + equate=False, + ), + # Extension options + _Option( + ["-xdrop_ungap", "xdrop_ungap"], + "X-dropoff value (in bits) for ungapped extensions (float).", + equate=False, + ), + _Option( + ["-xdrop_gap", "xdrop_gap"], + "X-dropoff value (in bits) for preliminary gapped extensions (float).", + equate=False, + ), + _Option( + ["-xdrop_gap_final", "xdrop_gap_final"], + "X-dropoff value (in bits) for final gapped alignment (float).", + equate=False, + ), + _Option( + ["-window_size", "window_size"], + "Multiple hits window size, use 0 to specify 1-hit algorithm " + "(integer).", + equate=False, + ), + # Search strategy options + _Option( + ["-import_search_strategy", "import_search_strategy"], + "Search strategy to use.\n\n" + "Incompatible with: export_search_strategy", + filename=True, + equate=False, + ), + _Option( + ["-export_search_strategy", "export_search_strategy"], + "File name to record the search strategy used.\n\n" + "Incompatible with: import_search_strategy", + filename=True, + equate=False, + ), + # Miscellaneous options + _Option( + ["-num_threads", "num_threads"], + "Number of threads to use in the BLAST search.\n\n" + "Integer, at least one. Default is one. Incompatible with: remote", + equate=False, + ), + _Switch( + ["-remote", "remote"], + "Execute search remotely?\n\n" + "Incompatible with: gilist, negative_gilist, subject_loc, " + "num_threads, ...", + ), + ] + try: + # Insert extra parameters - at the start just in case there + # are any arguments which must come last: + self.parameters = extra_parameters + self.parameters + except AttributeError: + # Should we raise an error? The subclass should have set this up! + self.parameters = extra_parameters + _NcbibaseblastCommandline.__init__(self, cmd, **kwargs) + + def _validate(self): + incompatibles = { + "remote": ["gilist", "negative_gilist", "num_threads"], + "import_search_strategy": ["export_search_strategy"], + "gilist": ["negative_gilist"], + "seqidlist": ["gilist", "negative_gilist", "remote"], + } + self._validate_incompatibilities(incompatibles) + if self.entrez_query and not self.remote: + raise ValueError("Option entrez_query requires remote option.") + AbstractCommandline._validate(self) + + +class _Ncbiblast2SeqCommandline(_NcbiblastCommandline): + """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). + + This is provided for subclassing, it deals with shared options + common to all the BLAST tools supporting two-sequence BLAST + (blastn, psiblast, etc) but not rpsblast or rpstblastn. + """ + + def __init__(self, cmd=None, **kwargs): + assert cmd is not None + extra_parameters = [ + # General search options: + _Option( + ["-gapopen", "gapopen"], "Cost to open a gap (integer).", equate=False + ), + _Option( + ["-gapextend", "gapextend"], + "Cost to extend a gap (integer).", + equate=False, + ), + # BLAST-2-Sequences options: + _Option( + ["-subject", "subject"], + "Subject sequence(s) to search.\n\n" + "Incompatible with: db, gilist, seqidlist, negative_gilist, " + "negative_seqidlist, db_soft_mask, db_hard_mask\n\n" + "See also subject_loc.", + filename=True, + equate=False, + ), + _Option( + ["-subject_loc", "subject_loc"], + "Location on the subject sequence (Format: start-stop).\n\n" + "Incompatible with: db, gilist, seqidlist, negative_gilist, " + "negative_seqidlist, db_soft_mask, db_hard_mask, remote.\n\n" + "See also subject.", + equate=False, + ), + # Restrict search or results: + _Option( + ["-culling_limit", "culling_limit"], + "Hit culling limit (integer).\n\n" + "If the query range of a hit is enveloped by that of at " + "least this many higher-scoring hits, delete the hit.\n\n" + "Incompatible with: best_hit_overhang, best_hit_score_edge.", + equate=False, + ), + _Option( + ["-best_hit_overhang", "best_hit_overhang"], + "Best Hit algorithm overhang value (float, recommended value: 0.1)\n\n" + "Float between 0.0 and 0.5 inclusive. " + "Incompatible with: culling_limit.", + equate=False, + ), + _Option( + ["-best_hit_score_edge", "best_hit_score_edge"], + "Best Hit algorithm score edge value (float).\n\n" + "Float between 0.0 and 0.5 inclusive. Recommended value: 0.1\n\n" + "Incompatible with: culling_limit.", + equate=False, + ), + ] + try: + # Insert extra parameters - at the start just in case there + # are any arguments which must come last: + self.parameters = extra_parameters + self.parameters + except AttributeError: + # Should we raise an error? The subclass should have set this up! + self.parameters = extra_parameters + _NcbiblastCommandline.__init__(self, cmd, **kwargs) + + def _validate(self): + incompatibles = { + "subject_loc": ["db", "gilist", "negative_gilist", "seqidlist", "remote"], + "culling_limit": ["best_hit_overhang", "best_hit_score_edge"], + "subject": ["db", "gilist", "negative_gilist", "seqidlist"], + } + self._validate_incompatibilities(incompatibles) + _NcbiblastCommandline._validate(self) + + +class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline): + """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). + + This is provided for subclassing, it deals with shared options + common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn + but not psiblast, rpsblast or rpstblastn. + """ + + def __init__(self, cmd=None, **kwargs): + assert cmd is not None + extra_parameters = [ + # Restrict search or results: + _Option( + ["-db_soft_mask", "db_soft_mask"], + "Filtering algorithm for soft masking (integer).\n\n" + "Filtering algorithm ID to apply to BLAST database as soft masking. " + "Incompatible with: db_hard_mask, subject, subject_loc", + equate=False, + ), + _Option( + ["-db_hard_mask", "db_hard_mask"], + "Filtering algorithm for hard masking (integer).\n\n" + "Filtering algorithm ID to apply to BLAST database as hard masking. " + "Incompatible with: db_soft_mask, subject, subject_loc", + equate=False, + ), + ] + try: + # Insert extra parameters - at the start just in case there + # are any arguments which must come last: + self.parameters = extra_parameters + self.parameters + except AttributeError: + # Should we raise an error? The subclass should have set this up! + self.parameters = extra_parameters + _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs) + + def _validate(self): + incompatibles = { + "db_soft_mask": ["db_hard_mask", "subject", "subject_loc"], + "db_hard_mask": ["db_soft_mask", "subject", "subject_loc"], + } + self._validate_incompatibilities(incompatibles) + _Ncbiblast2SeqCommandline._validate(self) + + +class NcbiblastpCommandline(_NcbiblastMain2SeqCommandline): + """Create a commandline for the NCBI BLAST+ program blastp (for proteins). + + With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI + replaced the old blastall tool with separate tools for each of the searches. + This wrapper therefore replaces BlastallCommandline with option -p blastp. + + >>> from Bio.Blast.Applications import NcbiblastpCommandline + >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr", + ... evalue=0.001, remote=True, ungapped=True) + >>> cline + NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True) + >>> print(cline) + blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="blastp", **kwargs): + """Initialize the class.""" + self.parameters = [ + # General search options: + _Option( + ["-task", "task"], + "Task to execute (string, blastp (default), blastp-fast or blastp-short).", + checker_function=lambda value: value + in ["blastp", "blastp-fast", "blastp-short"], + equate=False, + ), + _Option(["-matrix", "matrix"], "Scoring matrix name (default BLOSUM62)."), + _Option( + ["-threshold", "threshold"], + "Minimum score for words to be added to the BLAST lookup table (float).", + equate=False, + ), + _Option( + ["-comp_based_stats", "comp_based_stats"], + "Use composition-based statistics (string, default 2, i.e. True).\n\n" + "0, F or f: no composition-based statistics\n\n" + "2, T or t, D or d : Composition-based score adjustment as in " + "Bioinformatics 21:902-911, 2005, conditioned on sequence " + "properties\n\n" + "Note that tblastn also supports values of 1 and 3.", + checker_function=lambda value: value in "0Ft2TtDd", + equate=False, + ), + # Query filtering options: + _Option( + ["-seg", "seg"], + "Filter query sequence with SEG (string).\n\n" + 'Format: "yes", "window locut hicut", or "no" to disable\n' + 'Default is "12 2.2 2.5"', + equate=False, + ), + # Extension options: + _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), + # Miscellaneous options: + _Switch( + ["-use_sw_tback", "use_sw_tback"], + "Compute locally optimal Smith-Waterman alignments?", + ), + ] + _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs) + + +class NcbiblastnCommandline(_NcbiblastMain2SeqCommandline): + """Wrapper for the NCBI BLAST+ program blastn (for nucleotides). + + With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI + replaced the old blastall tool with separate tools for each of the searches. + This wrapper therefore replaces BlastallCommandline with option -p blastn. + + For example, to run a search against the "nt" nucleotide database using the + FASTA nucleotide file "m_code.fasta" as the query, with an expectation value + cut off of 0.001, saving the output to a file in XML format: + + >>> from Bio.Blast.Applications import NcbiblastnCommandline + >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus", + ... evalue=0.001, out="m_cold.xml", outfmt=5) + >>> cline + NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus') + >>> print(cline) + blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="blastn", **kwargs): + """Initialize the class.""" + self.parameters = [ + # Input query options: + _Option( + ["-strand", "strand"], + "Query strand(s) to search against database/subject.\n\n" + 'Values allowed are "both" (default), "minus", "plus".', + checker_function=lambda value: value in ["both", "minus", "plus"], + equate=False, + ), + # General search options: + _Option( + ["-task", "task"], + "Task to execute (string, default 'megablast')\n\n" + "Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast' " + "(the default), or 'vecscreen'.", + checker_function=lambda value: value + in ["blastn", "blastn-short", "dc-megablast", "megablast", "vecscreen"], + equate=False, + ), + _Option( + ["-penalty", "penalty"], + "Penalty for a nucleotide mismatch (integer, at most zero).", + equate=False, + ), + _Option( + ["-reward", "reward"], + "Reward for a nucleotide match (integer, at least zero).", + equate=False, + ), + _Option( + ["-use_index", "use_index"], + "Use MegaBLAST database index (Boolean, Default = False)", + equate=False, + ), + _Option( + ["-index_name", "index_name"], + "MegaBLAST database index name.", + equate=False, + ), + # Query filtering options: + _Option( + ["-dust", "dust"], + "Filter query sequence with DUST (string).\n\n" + "Format: 'yes', 'level window linker', or 'no' to disable.\n\n" + "Default = '20 64 1'.", + equate=False, + ), + _Option( + ["-filtering_db", "filtering_db"], + "BLAST database containing filtering elements (i.e. repeats).", + equate=False, + ), + _Option( + ["-window_masker_taxid", "window_masker_taxid"], + "Enable WindowMasker filtering using a Taxonomic ID (integer).", + equate=False, + ), + _Option( + ["-window_masker_db", "window_masker_db"], + "Enable WindowMasker filtering using this repeats database (string).", + equate=False, + ), + # Restrict search or results: + _Option( + ["-perc_identity", "perc_identity"], + "Percent identity (real, 0 to 100 inclusive).", + equate=False, + ), + # Discontiguous MegaBLAST options + _Option( + ["-template_type", "template_type"], + "Discontiguous MegaBLAST template type (string).\n\n" + "Allowed values: 'coding', 'coding_and_optimal' or 'optimal'.\n" + "Requires: template_length.", + checker_function=lambda value: value + in ["coding", "coding_and_optimal", "optimal"], + equate=False, + ), + _Option( + ["-template_length", "template_length"], + "Discontiguous MegaBLAST template length (integer).\n\n" + "Allowed values: 16, 18, 21.\n\n" + "Requires: template_type.", + checker_function=lambda value: value in [16, 18, 21, "16", "18", "21"], + equate=False, + ), + # Extension options: + _Switch( + ["-no_greedy", "no_greedy"], + "Use non-greedy dynamic programming extension", + ), + _Option( + ["-min_raw_gapped_score", "min_raw_gapped_score"], + "Minimum raw gapped score to keep an alignment in the " + "preliminary gapped and traceback stages (integer).", + equate=False, + ), + _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), + _Option( + ["-off_diagonal_range", "off_diagonal_range"], + "Number of off-diagonals to search for the 2nd hit (integer).\n\n" + "Expects a positive integer, or 0 (default) to turn off." + "Added in BLAST 2.2.23+", + equate=False, + ), + ] + _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs) + + def _validate(self): + if (self.template_type and not self.template_length) or ( + self.template_length and not self.template_type + ): + raise ValueError( + "Options template_type and template_type require each other." + ) + _NcbiblastMain2SeqCommandline._validate(self) + + +class NcbiblastxCommandline(_NcbiblastMain2SeqCommandline): + """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database). + + With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI + replaced the old blastall tool with separate tools for each of the searches. + This wrapper therefore replaces BlastallCommandline with option -p blastx. + + >>> from Bio.Blast.Applications import NcbiblastxCommandline + >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001) + >>> cline + NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001) + >>> print(cline) + blastx -query m_cold.fasta -db nr -evalue 0.001 + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="blastx", **kwargs): + """Initialize the class.""" + self.parameters = [ + # Input query options: + _Option( + ["-task", "task"], + "Task to execute (string, blastx (default) or blastx-fast).", + checker_function=lambda value: value in ["blastx", "blastx-fast"], + equate=False, + ), + _Option( + ["-strand", "strand"], + "Query strand(s) to search against database/subject.\n\n" + 'Values allowed are "both" (default), "minus", "plus".', + checker_function=lambda value: value in ["both", "minus", "plus"], + equate=False, + ), + # Input query options: + _Option( + ["-query_gencode", "query_gencode"], + "Genetic code to use to translate query (integer, default 1).", + equate=False, + ), + # General search options: + _Option( + ["-frame_shift_penalty", "frame_shift_penalty"], + "Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE).\n\n" + "This was removed in BLAST 2.2.27+", + equate=False, + ), + _Option( + ["-max_intron_length", "max_intron_length"], + "Maximum intron length (integer).\n\n" + "Length of the largest intron allowed in a translated nucleotide " + "sequence when linking multiple distinct alignments (a negative " + "value disables linking). Default zero.", + equate=False, + ), + _Option( + ["-matrix", "matrix"], + "Scoring matrix name (default BLOSUM62).", + equate=False, + ), + _Option( + ["-threshold", "threshold"], + "Minimum score for words to be added to the BLAST lookup table (float).", + equate=False, + ), + _Option( + ["-comp_based_stats", "comp_based_stats"], + "Use composition-based statistics for blastp, blastx, or tblastn.\n\n" + "D or d: default (equivalent to 2 )\n\n" + "0 or F or f: no composition-based statistics\n\n" + "1: Composition-based statistics as in NAR 29:2994-3005, 2001\n\n" + "2 or T or t : Composition-based score adjustment as in " + "Bioinformatics 21:902-911, 2005, conditioned on sequence " + "properties\n\n" + "3: Composition-based score adjustment as in Bioinformatics " + "21:902-911, 2005, unconditionally.\n\n" + "For programs other than tblastn, must either be absent or be " + "D, F or 0\n\n" + "Default = 2.", + equate=False, + ), + # Query filtering options: + _Option( + ["-seg", "seg"], + "Filter query sequence with SEG (string).\n\n" + 'Format: "yes", "window locut hicut", or "no" to disable.' + 'Default is "12 2.2 2.5"', + equate=False, + ), + # Extension options: + _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), + _Switch( + ["-use_sw_tback", "use_sw_tback"], + "Compute locally optimal Smith-Waterman alignments?", + ), + ] + _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs) + + +class NcbitblastnCommandline(_NcbiblastMain2SeqCommandline): + """Wrapper for the NCBI BLAST+ program tblastn. + + With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI + replaced the old blastall tool with separate tools for each of the searches. + This wrapper therefore replaces BlastallCommandline with option -p tblastn. + + >>> from Bio.Blast.Applications import NcbitblastnCommandline + >>> cline = NcbitblastnCommandline(help=True) + >>> cline + NcbitblastnCommandline(cmd='tblastn', help=True) + >>> print(cline) + tblastn -help + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="tblastn", **kwargs): + """Initialize the class.""" + self.parameters = [ + # General search options: + _Option( + ["-task", "task"], + "Task to execute (string, tblastn (default) or tblastn-fast).", + checker_function=lambda value: value in ["tblastn", "tblastn-fast"], + equate=False, + ), + _Option( + ["-db_gencode", "db_gencode"], + "Genetic code to use to translate query (integer, default 1).", + equate=False, + ), + _Option( + ["-frame_shift_penalty", "frame_shift_penalty"], + "Frame shift penalty (integer, at least 1, default ignored) (OBSOLETE).\n\n" + "This was removed in BLAST 2.2.27+", + equate=False, + ), + _Option( + ["-max_intron_length", "max_intron_length"], + "Maximum intron length (integer).\n\n" + "Length of the largest intron allowed in a translated nucleotide " + "sequence when linking multiple distinct alignments (a negative " + "value disables linking). Default zero.", + equate=False, + ), + _Option( + ["-matrix", "matrix"], + "Scoring matrix name (default BLOSUM62).", + equate=False, + ), + _Option( + ["-threshold", "threshold"], + "Minimum score for words to be added to the BLAST lookup table (float).", + equate=False, + ), + _Option( + ["-comp_based_stats", "comp_based_stats"], + "Use composition-based statistics (string, default 2, i.e. True).\n\n" + "0, F or f: no composition-based statistics\n\n" + "1: Composition-based statistics as in NAR 29:2994-3005, 2001\n\n" + "2, T or t, D or d : Composition-based score adjustment as in " + "Bioinformatics 21:902-911, 2005, conditioned on sequence properties\n\n" + "3: Composition-based score adjustment as in Bioinformatics 21:902-911, " + "2005, unconditionally\n\n" + "Note that only tblastn supports values of 1 and 3.", + checker_function=lambda value: value in "0Ft12TtDd3", + equate=False, + ), + # Query filtering options: + _Option( + ["-seg", "seg"], + "Filter query sequence with SEG (string).\n\n" + 'Format: "yes", "window locut hicut", or "no" to disable.\n\n' + 'Default is "12 2.2 2.5"', + equate=False, + ), + # Extension options: + _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), + # Miscellaneous options: + _Switch( + ["-use_sw_tback", "use_sw_tback"], + "Compute locally optimal Smith-Waterman alignments?", + ), + # PSI-TBLASTN options: + _Option( + ["-in_pssm", "in_pssm"], + "PSI-BLAST checkpoint file.\n\nIncompatible with: remote, query", + filename=True, + equate=False, + ), + ] + _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs) + + +class NcbitblastxCommandline(_NcbiblastMain2SeqCommandline): + """Wrapper for the NCBI BLAST+ program tblastx. + + With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI + replaced the old blastall tool with separate tools for each of the searches. + This wrapper therefore replaces BlastallCommandline with option -p tblastx. + + >>> from Bio.Blast.Applications import NcbitblastxCommandline + >>> cline = NcbitblastxCommandline(help=True) + >>> cline + NcbitblastxCommandline(cmd='tblastx', help=True) + >>> print(cline) + tblastx -help + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="tblastx", **kwargs): + """Initialize the class.""" + self.parameters = [ + # Input query options: + _Option( + ["-strand", "strand"], + "Query strand(s) to search against database/subject.\n\n" + 'Values allowed are "both" (default), "minus", "plus".', + checker_function=lambda value: value in ["both", "minus", "plus"], + equate=False, + ), + # Input query options: + _Option( + ["-query_gencode", "query_gencode"], + "Genetic code to use to translate query (integer, default 1).", + equate=False, + ), + # General search options: + _Option( + ["-db_gencode", "db_gencode"], + "Genetic code to use to translate query (integer, default 1).", + equate=False, + ), + _Option( + ["-max_intron_length", "max_intron_length"], + "Maximum intron length (integer).\n\n" + "Length of the largest intron allowed in a translated nucleotide " + "sequence when linking multiple distinct alignments (a negative " + "value disables linking). Default zero.", + equate=False, + ), + _Option( + ["-matrix", "matrix"], + "Scoring matrix name (default BLOSUM62).", + equate=False, + ), + _Option( + ["-threshold", "threshold"], + "Minimum score for words to be added to the BLAST lookup table (float).", + equate=False, + ), + # Query filtering options: + _Option( + ["-seg", "seg"], + "Filter query sequence with SEG (string).\n\n" + 'Format: "yes", "window locut hicut", or "no" to disable.\n\n' + 'Default is "12 2.2 2.5"', + equate=False, + ), + ] + _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs) + + +class NcbipsiblastCommandline(_Ncbiblast2SeqCommandline): + """Wrapper for the NCBI BLAST+ program psiblast. + + With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI + replaced the old blastpgp tool with a similar tool psiblast. This wrapper + therefore replaces BlastpgpCommandline, the wrapper for blastpgp. + + >>> from Bio.Blast.Applications import NcbipsiblastCommandline + >>> cline = NcbipsiblastCommandline(help=True) + >>> cline + NcbipsiblastCommandline(cmd='psiblast', help=True) + >>> print(cline) + psiblast -help + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="psiblast", **kwargs): + """Initialize the class.""" + self.parameters = [ + # General search options: + _Option( + ["-matrix", "matrix"], + "Scoring matrix name (default BLOSUM62).", + equate=False, + ), + _Option( + ["-threshold", "threshold"], + "Minimum score for words to be added to the BLAST lookup table (float).", + equate=False, + ), + _Option( + ["-comp_based_stats", "comp_based_stats"], + "Use composition-based statistics (string, default 2, i.e. True).\n\n" + "0, F or f: no composition-based statistics\n\n" + "2, T or t, D or d : Composition-based score adjustment as in " + "Bioinformatics 21:902-911, 2005, conditioned on sequence properties\n\n" + "Note that tblastn also supports values of 1 and 3.", + checker_function=lambda value: value in "0Ft2TtDd", + equate=False, + ), + # Query filtering options: + _Option( + ["-seg", "seg"], + "Filter query sequence with SEG (string).\n\n" + 'Format: "yes", "window locut hicut", or "no" to disable. ' + 'Default is "12 2.2 2.5"', + equate=False, + ), + # Extension options: + _Option( + ["-gap_trigger", "gap_trigger"], + "Number of bits to trigger gapping (float, default 22).", + equate=False, + ), + # Miscellaneous options: + _Switch( + ["-use_sw_tback", "use_sw_tback"], + "Compute locally optimal Smith-Waterman alignments?", + ), + # PSI-BLAST options: + _Option( + ["-num_iterations", "num_iterations"], + "Number of iterations to perform (integer, at least one).\n\n" + "Default is one. Incompatible with: remote", + equate=False, + ), + _Option( + ["-out_pssm", "out_pssm"], + "File name to store checkpoint file.", + filename=True, + equate=False, + ), + _Option( + ["-out_ascii_pssm", "out_ascii_pssm"], + "File name to store ASCII version of PSSM.", + filename=True, + equate=False, + ), + _Switch( + ["-save_pssm_after_last_round", "save_pssm_after_last_round"], + "Save PSSM after the last database search.", + ), + _Switch( + ["-save_each_pssm", "save_each_pssm"], + "Save PSSM after each iteration\n\n" + "File name is given in -save_pssm or -save_ascii_pssm options.", + ), + _Option( + ["-in_msa", "in_msa"], + "File name of multiple sequence alignment to restart PSI-BLAST.\n\n" + "Incompatible with: in_pssm, query", + filename=True, + equate=False, + ), + _Option( + ["-msa_master_idx", "msa_master_idx"], + "Index of sequence to use as master in MSA.\n\n" + "Index (1-based) of sequence to use as the master in the multiple " + "sequence alignment. If not specified, the first sequence is used.", + equate=False, + ), + _Option( + ["-in_pssm", "in_pssm"], + "PSI-BLAST checkpoint file.\n\n" + "Incompatible with: in_msa, query, phi_pattern", + filename=True, + equate=False, + ), + # PSSM engine options: + _Option( + ["-pseudocount", "pseudocount"], + "Pseudo-count value used when constructing PSSM.\n\n" + "Integer. Default is zero.", + equate=False, + ), + _Option( + ["-inclusion_ethresh", "inclusion_ethresh"], + "E-value inclusion threshold for pairwise alignments (float, default 0.002).", + equate=False, + ), + _Switch( + ["-ignore_msa_master", "ignore_msa_master"], + "Ignore the master sequence when creating PSSM.\n\n" + "Requires: in_msa\n" + "Incompatible with: msa_master_idx, in_pssm, query, query_loc, " + "phi_pattern", + ), + # PHI-BLAST options: + _Option( + ["-phi_pattern", "phi_pattern"], + "File name containing pattern to search.\n\n" + "Incompatible with: in_pssm", + filename=True, + equate=False, + ), + ] + _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs) + + def _validate(self): + incompatibles = { + "num_iterations": ["remote"], + "in_msa": ["in_pssm", "query"], + "in_pssm": ["in_msa", "query", "phi_pattern"], + "ignore_msa_master": [ + "msa_master_idx", + "in_pssm", + "query", + "query_loc", + "phi_pattern", + ], + } + self._validate_incompatibilities(incompatibles) + _Ncbiblast2SeqCommandline._validate(self) + + +class NcbirpsblastCommandline(_NcbiblastCommandline): + """Wrapper for the NCBI BLAST+ program rpsblast. + + With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI + replaced the old rpsblast tool with a similar tool of the same name. This + wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast. + + >>> from Bio.Blast.Applications import NcbirpsblastCommandline + >>> cline = NcbirpsblastCommandline(help=True) + >>> cline + NcbirpsblastCommandline(cmd='rpsblast', help=True) + >>> print(cline) + rpsblast -help + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="rpsblast", **kwargs): + """Initialize the class.""" + # TODO - remove the -word_size argument as per BLAST+ 2.2.30 + # (BLAST team say it should never have been included, since + # the word size is set when building the domain database.) + # This likely means reviewing the class hierarchy again. + self.parameters = [ + # Query filtering options: + _Option( + ["-seg", "seg"], + "Filter query sequence with SEG (string).\n\n" + 'Format: "yes", "window locut hicut", or "no" to disable.' + 'Default is "12 2.2 2.5"', + equate=False, + ), + # Restrict search or results: + _Option( + ["-culling_limit", "culling_limit"], + "Hit culling limit (integer).\n\n" + "If the query range of a hit is enveloped by that of at " + "least this many higher-scoring hits, delete the hit. " + "Incompatible with: best_hit_overhang, best_hit_score_edge.", + equate=False, + ), + _Option( + ["-best_hit_overhang", "best_hit_overhang"], + "Best Hit algorithm overhang value (recommended value: 0.1).\n\n" + "Float between 0.0 and 0.5 inclusive. " + "Incompatible with: culling_limit.", + equate=False, + ), + _Option( + ["-best_hit_score_edge", "best_hit_score_edge"], + "Best Hit algorithm score edge value (recommended value: 0.1).\n\n" + "Float between 0.0 and 0.5 inclusive. " + "Incompatible with: culling_limit.", + equate=False, + ), + # General search options: + _Option( + ["-comp_based_stats", "comp_based_stats"], + "Use composition-based statistics.\n\n" + "D or d: default (equivalent to 0)\n\n" + "0 or F or f: Simplified Composition-based statistics as in " + "Bioinformatics 15:1000-1011, 1999\n\n" + "1 or T or t: Composition-based statistics as in NAR 29:2994-3005, " + "2001\n\n" + "Default = 0.", + checker_function=lambda value: value in "Dd0Ff1Tt", + equate=False, + ), + # Misc options: + _Switch( + ["-use_sw_tback", "use_sw_tback"], + "Compute locally optimal Smith-Waterman alignments?", + ), + ] + _NcbiblastCommandline.__init__(self, cmd, **kwargs) + + def _validate(self): + incompatibles = {"culling_limit": ["best_hit_overhang", "best_hit_score_edge"]} + self._validate_incompatibilities(incompatibles) + _NcbiblastCommandline._validate(self) + + +class NcbirpstblastnCommandline(_NcbiblastCommandline): + """Wrapper for the NCBI BLAST+ program rpstblastn. + + With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI + replaced the old rpsblast tool with a similar tool of the same name, and a + separate tool rpstblastn for Translated Reverse Position Specific BLAST. + + >>> from Bio.Blast.Applications import NcbirpstblastnCommandline + >>> cline = NcbirpstblastnCommandline(help=True) + >>> cline + NcbirpstblastnCommandline(cmd='rpstblastn', help=True) + >>> print(cline) + rpstblastn -help + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="rpstblastn", **kwargs): + """Initialize the class.""" + # TODO - remove the -word_size argument as per BLAST+ 2.2.30 + # (BLAST team say it should never have been included, since + # the word size is set when building the domain database.) + # This likely means reviewing the class hierarchy again. + self.parameters = [ + # Input query options: + _Option( + ["-strand", "strand"], + "Query strand(s) to search against database/subject.\n\n" + 'Values allowed are "both" (default), "minus", "plus".', + checker_function=lambda value: value in ["both", "minus", "plus"], + equate=False, + ), + # Input query options: + _Option( + ["-query_gencode", "query_gencode"], + "Genetic code to use to translate query (integer, default 1).", + equate=False, + ), + # Query filtering options: + _Option( + ["-seg", "seg"], + "Filter query sequence with SEG (string).\n\n" + 'Format: "yes", "window locut hicut", or "no" to disable. ' + 'Default is "12 2.2 2.5"', + equate=False, + ), + # General search options: + _Option( + ["-comp_based_stats", "comp_based_stats"], + "Use composition-based statistics.\n\n" + "D or d: default (equivalent to 0)\n\n" + "0 or F or f: Simplified Composition-based statistics as in " + "Bioinformatics 15:1000-1011, 1999\n\n" + "1 or T or t: Composition-based statistics as in NAR 29:2994-3005, " + "2001\n\n" + "Default = 0.", + checker_function=lambda value: value in "Dd0Ff1Tt", + equate=False, + ), + # Extension options: + _Switch(["-ungapped", "ungapped"], "Perform ungapped alignment only?"), + # Miscellaneous options: + _Switch( + ["-use_sw_tback", "use_sw_tback"], + "Compute locally optimal Smith-Waterman alignments?", + ), + ] + _NcbiblastCommandline.__init__(self, cmd, **kwargs) + + +class NcbiblastformatterCommandline(_NcbibaseblastCommandline): + """Wrapper for the NCBI BLAST+ program blast_formatter. + + With the release of BLAST 2.2.24+ (i.e. the BLAST suite rewritten in C++ + instead of C), the NCBI added the ASN.1 output format option to all the + search tools, and extended the blast_formatter to support this as input. + + The blast_formatter command allows you to convert the ASN.1 output into + the other output formats (XML, tabular, plain text, HTML). + + >>> from Bio.Blast.Applications import NcbiblastformatterCommandline + >>> cline = NcbiblastformatterCommandline(archive="example.asn", outfmt=5, out="example.xml") + >>> cline + NcbiblastformatterCommandline(cmd='blast_formatter', out='example.xml', outfmt=5, archive='example.asn') + >>> print(cline) + blast_formatter -out example.xml -outfmt 5 -archive example.asn + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + + Note that this wrapper is for the version of blast_formatter from BLAST + 2.2.24+ (or later) which is when the NCBI first announced the inclusion + this tool. There was actually an early version in BLAST 2.2.23+ (and + possibly in older releases) but this did not have the -archive option + (instead -rid is a mandatory argument), and is not supported by this + wrapper. + """ + + def __init__(self, cmd="blast_formatter", **kwargs): + """Initialize the class.""" + self.parameters = [ + # Input options + _Option( + ["-rid", "rid"], + "BLAST Request ID (RID), not compatible with archive arg.", + equate=False, + ), + _Option( + ["-archive", "archive"], + "Archive file of results, not compatible with rid arg.", + filename=True, + equate=False, + ), + # Restrict search or results + _Option( + ["-max_target_seqs", "max_target_seqs"], + "Maximum number of aligned sequences to keep.", + checker_function=lambda value: value >= 1, + equate=False, + ), + ] + _NcbibaseblastCommandline.__init__(self, cmd, **kwargs) + + def _validate(self): + incompatibles = {"rid": ["archive"]} + self._validate_incompatibilities(incompatibles) + _NcbibaseblastCommandline._validate(self) + + +class NcbideltablastCommandline(_Ncbiblast2SeqCommandline): + """Create a commandline for the NCBI BLAST+ program deltablast (for proteins). + + This is a wrapper for the deltablast command line command included in + the NCBI BLAST+ software (not present in the original BLAST). + + >>> from Bio.Blast.Applications import NcbideltablastCommandline + >>> cline = NcbideltablastCommandline(query="rosemary.pro", db="nr", + ... evalue=0.001, remote=True) + >>> cline + NcbideltablastCommandline(cmd='deltablast', query='rosemary.pro', db='nr', evalue=0.001, remote=True) + >>> print(cline) + deltablast -query rosemary.pro -db nr -evalue 0.001 -remote + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="deltablast", **kwargs): + """Initialize the class.""" + self.parameters = [ + # General search options: + _Option(["-matrix", "matrix"], "Scoring matrix name (default BLOSUM62)."), + _Option( + ["-threshold", "threshold"], + "Minimum score for words to be added to the BLAST lookup table (float).", + equate=False, + ), + _Option( + ["-comp_based_stats", "comp_based_stats"], + "Use composition-based statistics (string, default 2, i.e. True).\n\n" + "0, F or f: no composition-based statistics.\n\n" + "2, T or t, D or d : Composition-based score adjustment as in " + "Bioinformatics 21:902-911, 2005, conditioned on sequence properties\n\n" + "Note that tblastn also supports values of 1 and 3.", + checker_function=lambda value: value in "0Ft2TtDd", + equate=False, + ), + # Query filtering options: + _Option( + ["-seg", "seg"], + "Filter query sequence with SEG (string).\n\n" + 'Format: "yes", "window locut hicut", or "no" to disable. ' + 'Default is "12 2.2 2.5"', + equate=False, + ), + # Extension options: + _Option( + ["-gap_trigger", "gap_trigger"], + "Number of bits to trigger gapping. Default = 22.", + equate=False, + ), + # Miscellaneous options: + _Switch( + ["-use_sw_tback", "use_sw_tback"], + "Compute locally optimal Smith-Waterman alignments?", + ), + # PSI-BLAST options + _Option( + ["-num_iterations", "num_iterations"], + "Number of iterations to perform. (integer >=1, Default is 1).\n\n" + "Incompatible with: remote", + equate=False, + ), + _Option( + ["-out_pssm", "out_pssm"], + "File name to store checkpoint file.", + filename=True, + equate=False, + ), + _Option( + ["-out_ascii_pssm", "out_ascii_pssm"], + "File name to store ASCII version of PSSM.", + filename=True, + equate=False, + ), + _Switch( + ["-save_pssm_after_last_round", "save_pssm_after_last_round"], + "Save PSSM after the last database search.", + ), + _Switch( + ["-save_each_pssm", "save_each_pssm"], + "Save PSSM after each iteration.\n\n" + "File name is given in -save_pssm or -save_ascii_pssm options.", + ), + # PSSM engine options + _Option( + ["-pseudocount", "pseudocount"], + "Pseudo-count value used when constructing PSSM (integer, default 0).", + equate=False, + ), + _Option( + ["-domain_inclusion_ethresh", "domain_inclusion_ethresh"], + "E-value inclusion threshold for alignments with conserved domains.\n\n" + "(float, Default is 0.05)", + equate=False, + ), + _Option( + ["-inclusion_ethresh", "inclusion_ethresh"], + "Pairwise alignment e-value inclusion threshold (float, default 0.002).", + equate=False, + ), + # DELTA-BLAST options + _Option( + ["-rpsdb", "rpsdb"], + "BLAST domain database name (dtring, Default = 'cdd_delta').", + equate=False, + ), + _Switch( + ["-show_domain_hits", "show_domain_hits"], + "Show domain hits?\n\nIncompatible with: remote, subject", + ), + ] + _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs) + + +class NcbimakeblastdbCommandline(AbstractCommandline): + """Wrapper for the NCBI BLAST+ program makeblastdb. + + This is a wrapper for the NCBI BLAST+ makeblastdb application + to create BLAST databases. By default, this creates a blast database + with the same name as the input file. The default output location + is the same directory as the input. + + >>> from Bio.Blast.Applications import NcbimakeblastdbCommandline + >>> cline = NcbimakeblastdbCommandline(dbtype="prot", + ... input_file="NC_005816.faa") + >>> cline + NcbimakeblastdbCommandline(cmd='makeblastdb', dbtype='prot', input_file='NC_005816.faa') + >>> print(cline) + makeblastdb -dbtype prot -in NC_005816.faa + + You would typically run the command line with cline() or via the Python + subprocess module, as described in the Biopython tutorial. + """ + + def __init__(self, cmd="makeblastdb", **kwargs): + """Initialize the class.""" + self.parameters = [ + # Basic input options + _Switch( + ["-h", "h"], "Print USAGE and DESCRIPTION; ignore other arguments." + ), + _Switch( + ["-help", "help"], + "Print USAGE, DESCRIPTION and ARGUMENTS description; " + "ignore other arguments.", + ), + _Switch( + ["-version", "version"], + "Print version number; ignore other arguments.", + ), + # Output configuration options + _Option( + ["-out", "out"], + "Output file for alignment.", + filename=True, + equate=False, + ), + # makeblastdb specific options + _Option( + ["-blastdb_version", "blastdb_version"], + "Version of BLAST database to be created. " + "Tip: use BLAST database version 4 on 32 bit CPU. " + "Default = 5", + equate=False, + checker_function=lambda x: x == 4 or x == 5, + ), + _Option( + ["-dbtype", "dbtype"], + "Molecule type of target db ('nucl' or 'prot').", + equate=False, + is_required=True, + checker_function=lambda x: x == "nucl" or x == "prot", + ), + _Option( + ["-in", "input_file"], + "Input file/database name.", + filename=True, + equate=False, + ), + _Option( + ["-input_type", "input_type"], + "Type of the data specified in input_file.\n\n" + "Default = 'fasta'. Added in BLAST 2.2.26.", + filename=False, + equate=False, + checker_function=self._input_type_checker, + ), + _Option( + ["-title", "title"], + "Title for BLAST database.", + filename=False, + equate=False, + ), + _Switch( + ["-parse_seqids", "parse_seqids"], + "Option to parse seqid for FASTA input if set.\n\n" + "For all other input types, seqids are parsed automatically", + ), + _Switch( + ["-hash_index", "hash_index"], "Create index of sequence hash values." + ), + _Option( + ["-mask_data", "mask_data"], + "Comma-separated list of input files containing masking " + "data as produced by NCBI masking applications " + "(e.g. dustmasker, segmasker, windowmasker).", + filename=True, + equate=False, + ), + _Option( + ["-mask_id", "mask_id"], + "Comma-separated list of strings to uniquely identify the " + "masking algorithm.", + filename=False, + equate=False, + ), + _Option( + ["-mask_desc", "mask_desc"], + "Comma-separated list of free form strings to describe " + "the masking algorithm details.", + filename=False, + equate=False, + ), + _Switch(["-gi_mask", "gi_mask"], "Create GI indexed masking data."), + _Option( + ["-gi_mask_name", "gi_mask_name"], + "Comma-separated list of masking data output files.", + filename=False, + equate=False, + ), + _Option( + ["-max_file_sz", "max_file_sz"], + "Maximum file size for BLAST database files. Default = '1GB'.", + filename=False, + equate=False, + ), + _Option( + ["-logfile", "logfile"], + "File to which the program log should be redirected.", + filename=True, + equate=False, + ), + _Option( + ["-taxid", "taxid"], + "Taxonomy ID to assign to all sequences.", + filename=False, + equate=False, + checker_function=lambda x: type(x)(int(x)) == x, + ), + _Option( + ["-taxid_map", "taxid_map"], + "Text file mapping sequence IDs to taxonomy IDs.\n\n" + "Format: ", + filename=True, + equate=False, + ), + ] + AbstractCommandline.__init__(self, cmd, **kwargs) + + def _input_type_checker(self, command): + return command in ("asn1_bin", "asn1_txt", "blastdb", "fasta") + + def _validate(self): + incompatibles = { + "mask_id": ["gi_mask"], + "gi_mask": ["mask_id"], + "taxid": ["taxid_map"], + } + + # Copied from _NcbibaseblastCommandline class above. + # Code repeated here for python2 and 3 compatibility, + # because this is not a _NcbibaseblastCommandline subclass. + for a in incompatibles: + if self._get_parameter(a): + for b in incompatibles[a]: + if self._get_parameter(b): + raise ValueError("Options %s and %s are incompatible." % (a, b)) + + if self.mask_id and not self.mask_data: + raise ValueError("Option mask_id requires mask_data to be set.") + if self.mask_desc and not self.mask_id: + raise ValueError("Option mask_desc requires mask_id to be set.") + if self.gi_mask and not self.parse_seqids: + raise ValueError("Option gi_mask requires parse_seqids to be set.") + if self.gi_mask_name and not (self.mask_data and self.gi_mask): + raise ValueError( + "Option gi_mask_name requires mask_data and gi_mask to be set." + ) + if self.taxid_map and not self.parse_seqids: + raise ValueError("Option taxid_map requires parse_seqids to be set.") + AbstractCommandline._validate(self) + + +def _test(): + """Run the Bio.Blast.Applications module's doctests (PRIVATE).""" + import doctest + + doctest.testmod(verbose=1) + + +if __name__ == "__main__": + # Run the doctests + _test() diff --git a/code/lib/Bio/Blast/NCBIWWW.py b/code/lib/Bio/Blast/NCBIWWW.py new file mode 100644 index 0000000..4bcca3f --- /dev/null +++ b/code/lib/Bio/Blast/NCBIWWW.py @@ -0,0 +1,348 @@ +# Copyright 1999 by Jeffrey Chang. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +# +# Patched by Brad Chapman. +# Chris Wroe added modifications for work in myGrid + +"""Code to invoke the NCBI BLAST server over the internet. + +This module provides code to work with the WWW version of BLAST +provided by the NCBI. https://blast.ncbi.nlm.nih.gov/ +""" + + +import warnings + +from io import StringIO +import time + +from urllib.request import urlopen +from urllib.parse import urlencode +from urllib.request import Request + +from Bio import BiopythonWarning + + +NCBI_BLAST_URL = "https://blast.ncbi.nlm.nih.gov/Blast.cgi" + + +def qblast( + program, + database, + sequence, + url_base=NCBI_BLAST_URL, + auto_format=None, + composition_based_statistics=None, + db_genetic_code=None, + endpoints=None, + entrez_query="(none)", + expect=10.0, + filter=None, + gapcosts=None, + genetic_code=None, + hitlist_size=50, + i_thresh=None, + layout=None, + lcase_mask=None, + matrix_name=None, + nucl_penalty=None, + nucl_reward=None, + other_advanced=None, + perc_ident=None, + phi_pattern=None, + query_file=None, + query_believe_defline=None, + query_from=None, + query_to=None, + searchsp_eff=None, + service=None, + threshold=None, + ungapped_alignment=None, + word_size=None, + short_query=None, + alignments=500, + alignment_view=None, + descriptions=500, + entrez_links_new_window=None, + expect_low=None, + expect_high=None, + format_entrez_query=None, + format_object=None, + format_type="XML", + ncbi_gi=None, + results_file=None, + show_overview=None, + megablast=None, + template_type=None, + template_length=None, +): + """BLAST search using NCBI's QBLAST server or a cloud service provider. + + Supports all parameters of the old qblast API for Put and Get. + + Please note that NCBI uses the new Common URL API for BLAST searches + on the internet (http://ncbi.github.io/blast-cloud/dev/api.html). Thus, + some of the parameters used by this function are not (or are no longer) + officially supported by NCBI. Although they are still functioning, this + may change in the future. + + The Common URL API (http://ncbi.github.io/blast-cloud/dev/api.html) allows + doing BLAST searches on cloud servers. To use this feature, please set + ``url_base='http://host.my.cloud.service.provider.com/cgi-bin/blast.cgi'`` + and ``format_object='Alignment'``. For more details, please see + https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=CloudBlast + + Some useful parameters: + + - program blastn, blastp, blastx, tblastn, or tblastx (lower case) + - database Which database to search against (e.g. "nr"). + - sequence The sequence to search. + - ncbi_gi TRUE/FALSE whether to give 'gi' identifier. + - descriptions Number of descriptions to show. Def 500. + - alignments Number of alignments to show. Def 500. + - expect An expect value cutoff. Def 10.0. + - matrix_name Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45). + - filter "none" turns off filtering. Default no filtering + - format_type "HTML", "Text", "ASN.1", or "XML". Def. "XML". + - entrez_query Entrez query to limit Blast search + - hitlist_size Number of hits to return. Default 50 + - megablast TRUE/FALSE whether to use MEga BLAST algorithm (blastn only) + - short_query TRUE/FALSE whether to adjust the search parameters for a + short query sequence. Note that this will override + manually set parameters like word size and e value. Turns + off when sequence length is > 30 residues. Default: None. + - service plain, psi, phi, rpsblast, megablast (lower case) + + This function does no checking of the validity of the parameters + and passes the values to the server as is. More help is available at: + https://ncbi.github.io/blast-cloud/dev/api.html + + """ + programs = ["blastn", "blastp", "blastx", "tblastn", "tblastx"] + if program not in programs: + raise ValueError( + "Program specified is %s. Expected one of %s" + % (program, ", ".join(programs)) + ) + + # SHORT_QUERY_ADJUST throws an error when using blastn (wrong parameter + # assignment from NCBIs side). + # Thus we set the (known) parameters directly: + if short_query and program == "blastn": + short_query = None + # We only use the 'short-query' parameters for short sequences: + if len(sequence) < 31: + expect = 1000 + word_size = 7 + nucl_reward = 1 + filter = None + lcase_mask = None + warnings.warn( + '"SHORT_QUERY_ADJUST" is incorrectly implemented (by NCBI) for blastn.' + " We bypass the problem by manually adjusting the search parameters." + " Thus, results may slightly differ from web page searches.", + BiopythonWarning, + ) + + # Format the "Put" command, which sends search requests to qblast. + # Parameters taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node5.html on 9 July 2007 + # Additional parameters are taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node9.html on 8 Oct 2010 + # To perform a PSI-BLAST or PHI-BLAST search the service ("Put" and "Get" commands) must be specified + # (e.g. psi_blast = NCBIWWW.qblast("blastp", "refseq_protein", input_sequence, service="psi")) + parameters = [ + ("AUTO_FORMAT", auto_format), + ("COMPOSITION_BASED_STATISTICS", composition_based_statistics), + ("DATABASE", database), + ("DB_GENETIC_CODE", db_genetic_code), + ("ENDPOINTS", endpoints), + ("ENTREZ_QUERY", entrez_query), + ("EXPECT", expect), + ("FILTER", filter), + ("GAPCOSTS", gapcosts), + ("GENETIC_CODE", genetic_code), + ("HITLIST_SIZE", hitlist_size), + ("I_THRESH", i_thresh), + ("LAYOUT", layout), + ("LCASE_MASK", lcase_mask), + ("MEGABLAST", megablast), + ("MATRIX_NAME", matrix_name), + ("NUCL_PENALTY", nucl_penalty), + ("NUCL_REWARD", nucl_reward), + ("OTHER_ADVANCED", other_advanced), + ("PERC_IDENT", perc_ident), + ("PHI_PATTERN", phi_pattern), + ("PROGRAM", program), + # ('PSSM',pssm), - It is possible to use PSI-BLAST via this API? + ("QUERY", sequence), + ("QUERY_FILE", query_file), + ("QUERY_BELIEVE_DEFLINE", query_believe_defline), + ("QUERY_FROM", query_from), + ("QUERY_TO", query_to), + # ('RESULTS_FILE',...), - Can we use this parameter? + ("SEARCHSP_EFF", searchsp_eff), + ("SERVICE", service), + ("SHORT_QUERY_ADJUST", short_query), + ("TEMPLATE_TYPE", template_type), + ("TEMPLATE_LENGTH", template_length), + ("THRESHOLD", threshold), + ("UNGAPPED_ALIGNMENT", ungapped_alignment), + ("WORD_SIZE", word_size), + ("CMD", "Put"), + ] + query = [x for x in parameters if x[1] is not None] + message = urlencode(query).encode() + + # Send off the initial query to qblast. + # Note the NCBI do not currently impose a rate limit here, other + # than the request not to make say 50 queries at once using multiple + # threads. + request = Request(url_base, message, {"User-Agent": "BiopythonClient"}) + handle = urlopen(request) + + # Format the "Get" command, which gets the formatted results from qblast + # Parameters taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node6.html on 9 July 2007 + rid, rtoe = _parse_qblast_ref_page(handle) + parameters = [ + ("ALIGNMENTS", alignments), + ("ALIGNMENT_VIEW", alignment_view), + ("DESCRIPTIONS", descriptions), + ("ENTREZ_LINKS_NEW_WINDOW", entrez_links_new_window), + ("EXPECT_LOW", expect_low), + ("EXPECT_HIGH", expect_high), + ("FORMAT_ENTREZ_QUERY", format_entrez_query), + ("FORMAT_OBJECT", format_object), + ("FORMAT_TYPE", format_type), + ("NCBI_GI", ncbi_gi), + ("RID", rid), + ("RESULTS_FILE", results_file), + ("SERVICE", service), + ("SHOW_OVERVIEW", show_overview), + ("CMD", "Get"), + ] + query = [x for x in parameters if x[1] is not None] + message = urlencode(query).encode() + + # Poll NCBI until the results are ready. + # https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=DeveloperInfo + # 1. Do not contact the server more often than once every 10 seconds. + # 2. Do not poll for any single RID more often than once a minute. + # 3. Use the URL parameter email and tool, so that the NCBI + # can contact you if there is a problem. + # 4. Run scripts weekends or between 9 pm and 5 am Eastern time + # on weekdays if more than 50 searches will be submitted. + # -- + # Could start with a 10s delay, but expect most short queries + # will take longer thus at least 70s with delay. Therefore, + # start with 20s delay, thereafter once a minute. + delay = 20 # seconds + while True: + current = time.time() + wait = qblast._previous + delay - current + if wait > 0: + time.sleep(wait) + qblast._previous = current + wait + else: + qblast._previous = current + # delay by at least 60 seconds only if running the request against the public NCBI API + if delay < 60 and url_base == NCBI_BLAST_URL: + # Wasn't a quick return, must wait at least a minute + delay = 60 + + request = Request(url_base, message, {"User-Agent": "BiopythonClient"}) + handle = urlopen(request) + results = handle.read().decode() + + # Can see an "\n\n" page while results are in progress, + # if so just wait a bit longer... + if results == "\n\n": + continue + # XML results don't have the Status tag when finished + if "Status=" not in results: + break + i = results.index("Status=") + j = results.index("\n", i) + status = results[i + len("Status=") : j].strip() + if status.upper() == "READY": + break + return StringIO(results) + + +qblast._previous = 0 + + +def _parse_qblast_ref_page(handle): + """Extract a tuple of RID, RTOE from the 'please wait' page (PRIVATE). + + The NCBI FAQ pages use TOE for 'Time of Execution', so RTOE is probably + 'Request Time of Execution' and RID would be 'Request Identifier'. + """ + s = handle.read().decode() + i = s.find("RID =") + if i == -1: + rid = None + else: + j = s.find("\n", i) + rid = s[i + len("RID =") : j].strip() + + i = s.find("RTOE =") + if i == -1: + rtoe = None + else: + j = s.find("\n", i) + rtoe = s[i + len("RTOE =") : j].strip() + + if not rid and not rtoe: + # Can we reliably extract the error message from the HTML page? + # e.g. "Message ID#24 Error: Failed to read the Blast query: + # Nucleotide FASTA provided for protein sequence" + # or "Message ID#32 Error: Query contains no data: Query + # contains no sequence data" + # + # This used to occur inside a

entry: + i = s.find('
') + if i != -1: + msg = s[i + len('
') :].strip() + msg = msg.split("
", 1)[0].split("\n", 1)[0].strip() + if msg: + raise ValueError("Error message from NCBI: %s" % msg) + # In spring 2010 the markup was like this: + i = s.find('

') + if i != -1: + msg = s[i + len('

') :].strip() + msg = msg.split("

", 1)[0].split("\n", 1)[0].strip() + if msg: + raise ValueError("Error message from NCBI: %s" % msg) + # Generic search based on the way the error messages start: + i = s.find("Message ID#") + if i != -1: + # Break the message at the first HTML tag + msg = s[i:].split("<", 1)[0].split("\n", 1)[0].strip() + raise ValueError("Error message from NCBI: %s" % msg) + # We didn't recognise the error layout :( + # print(s) + raise ValueError( + "No RID and no RTOE found in the 'please wait' page, " + "there was probably an error in your request but we " + "could not extract a helpful error message." + ) + elif not rid: + # Can this happen? + raise ValueError( + "No RID found in the 'please wait' page. (although RTOE = %r)" % rtoe + ) + elif not rtoe: + # Can this happen? + raise ValueError( + "No RTOE found in the 'please wait' page. (although RID = %r)" % rid + ) + + try: + return rid, int(rtoe) + except ValueError: + raise ValueError( + "A non-integer RTOE found in the 'please wait' page, %r" % rtoe + ) from None diff --git a/code/lib/Bio/Blast/NCBIXML.py b/code/lib/Bio/Blast/NCBIXML.py new file mode 100644 index 0000000..90e91a9 --- /dev/null +++ b/code/lib/Bio/Blast/NCBIXML.py @@ -0,0 +1,864 @@ +# Copyright 2000 by Bertrand Frottier. All rights reserved. +# Revisions 2005-2006 copyright Michiel de Hoon +# Revisions 2006-2009 copyright Peter Cock +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Code to work with the BLAST XML output. + +The BLAST XML DTD file is on the NCBI FTP site at: +ftp://ftp.ncbi.nlm.nih.gov/blast/documents/xml/NCBI_BlastOutput.dtd +""" + +from Bio.Blast import Record +import xml.sax +from xml.sax.handler import ContentHandler + + +class _XMLparser(ContentHandler): + """Generic SAX Parser (PRIVATE). + + Just a very basic SAX parser. + + Redefine the methods startElement, characters and endElement. + """ + + def __init__(self, debug=0): + """Initialize the parser. + + Arguments: + - debug - integer, amount of debug information to print + + """ + self._tag = [] + self._value = "" + self._debug = debug + self._debug_ignore_list = [] + self._method_name_level = 1 + self._method_map = None + + def startElement(self, name, attr): + """Found XML start tag. + + No real need of attr, BLAST DTD doesn't use them + + Arguments: + - name -- name of the tag + - attr -- tag attributes + + """ + self._tag.append(name) + + if len(self._tag) == 1: + # root node + self._on_root_node(name) + return + + # Try to call a method (defined in subclasses) + method = "start_" + self._node_method_name(name) + + # Note could use try / except AttributeError + # BUT I found often triggered by nested errors... + if method in self._method_map: + self._method_map[method]() + if self._debug > 4: + print("NCBIXML: Parsed: " + method) + elif self._debug > 3: + # Doesn't exist (yet) and may want to warn about it + if method not in self._debug_ignore_list: + print("NCBIXML: Ignored: " + method) + self._debug_ignore_list.append(method) + + # We don't care about white space in parent tags like Hsp, + # but that white space doesn't belong to child tags like Hsp_midline + if self._value.strip(): + raise ValueError( + "What should we do with %s before the %r tag?" % (self._value, name) + ) + self._value = "" + + def characters(self, ch): + """Found some text. + + Arguments: + - ch -- characters read + + """ + self._value += ch # You don't ever get the whole string + + def endElement(self, name): + """Found XML end tag. + + Arguments: + - name -- tag name + + """ + # DON'T strip any white space, we may need it e.g. the hsp-midline + + # Try to call a method (defined in subclasses) + method = "end_" + self._node_method_name(name) + + # Note could use try / except AttributeError + # BUT I found often triggered by nested errors... + if method in self._method_map: + self._method_map[method]() + if self._debug > 2: + print("NCBIXML: Parsed: %s %s" % (method, self._value)) + elif self._debug > 1: + # Doesn't exist (yet) and may want to warn about it + if method not in self._debug_ignore_list: + print("NCBIXML: Ignored: %s %s" % (method, self._value)) + self._debug_ignore_list.append(method) + + # Reset character buffer + self._value = "" + + self._tag.pop() + + def _node_method_name(self, name): + if self._method_name_level == 1: + return name + return "/".join(self._tag[-self._method_name_level :]) + + +class BlastParser(_XMLparser): + """Parse XML BLAST data into a Record.Blast object. + + Parses XML output from BLAST (direct use discouraged). + This (now) returns a list of Blast records. + Historically it returned a single Blast record. + You are expected to use this via the parse or read functions. + + All XML 'action' methods are private methods and may be: + + - ``_start_TAG`` called when the start tag is found + - ``_end_TAG`` called when the end tag is found + + """ + + def __init__(self, debug=0): + """Initialize the parser. + + Arguments: + - debug - integer, amount of debug information to print + + """ + # Calling superclass method + _XMLparser.__init__(self, debug) + + self._parser = xml.sax.make_parser() + self._parser.setContentHandler(self) + + # To avoid ValueError: unknown url type: NCBI_BlastOutput.dtd + self._parser.setFeature(xml.sax.handler.feature_validation, 0) + self._parser.setFeature(xml.sax.handler.feature_namespaces, 0) + self._parser.setFeature(xml.sax.handler.feature_external_pes, 0) + self._parser.setFeature(xml.sax.handler.feature_external_ges, 0) + + self._xml_version = 1 + + self.reset() + + def reset(self): + """Reset all the data allowing reuse of the BlastParser() object.""" + self._records = [] + self._header = Record.Header() + self._parameters = Record.Parameters() + self._parameters.filter = None # Maybe I should update the class? + + def _on_root_node(self, name): + if name == "BlastOutput": + self._setup_blast_v1() + elif name == "BlastXML2": + self._setup_blast_v2() + else: + raise ValueError( + "Invalid root node name: %s. Root node should be either" + " BlastOutput or BlastXML2" % name + ) + + def _setup_blast_v1(self): + self._method_map = { + "start_Iteration": self._start_blast_record, + "end_Iteration": self._end_blast_record, + "end_BlastOutput_program": self._set_header_application, + "end_BlastOutput_version": self._set_header_version, + "end_BlastOutput_reference": self._set_header_reference, + "end_BlastOutput_db": self._set_header_database, + "end_BlastOutput_query-ID": self._set_header_query_id, + "end_BlastOutput_query-def": self._set_header_query, + "end_BlastOutput_query-len": self._set_header_query_letters, + "end_Iteration_query-ID": self._set_record_query_id, + "end_Iteration_query-def": self._set_record_query_def, + "end_Iteration_query-len": self._set_record_query_letters, + "end_BlastOutput_hits": self._set_record_hits, + "end_Parameters_matrix": self._set_parameters_matrix, + "end_Parameters_expect": self._set_parameters_expect, + "end_Parameters_sc-match": self._set_parameters_sc_match, + "end_Parameters_sc-mismatch": self._set_parameters_sc_mismatch, + "end_Parameters_gap-open": self._set_parameters_gap_penalties, + "end_Parameters_gap-extend": self._set_parameters_gap_extend, + "end_Parameters_filter": self._set_parameters_filter, + "start_Hit": self._start_hit, + "end_Hit": self._end_hit, + "end_Hit_id": self.set_hit_id, + "end_Hit_def": self.set_hit_def, + "end_Hit_accession": self.set_hit_accession, + "end_Hit_len": self.set_hit_len, + "start_Hsp": self._start_hsp, + "end_Hsp_score": self._set_hsp_score, + "end_Hsp_bit-score": self._set_hsp_bit_score, + "end_Hsp_evalue": self._set_hsp_e_value, + "end_Hsp_query-from": self._set_hsp_query_start, + "end_Hsp_query-to": self._set_hsp_query_end, + "end_Hsp_hit-from": self._set_hsp_hit_from, + "end_Hsp_hit-to": self._set_hsp_hit_to, + "end_Hsp_query-frame": self._set_hsp_query_frame, + "end_Hsp_hit-frame": self._set_hsp_hit_frame, + "end_Hsp_identity": self._set_hsp_identity, + "end_Hsp_positive": self._set_hsp_positive, + "end_Hsp_gaps": self._set_hsp_gaps, + "end_Hsp_align-len": self._set_hsp_align_len, + "end_Hsp_qseq": self._set_hsp_query_seq, + "end_Hsp_hseq": self._set_hsp_subject_seq, + "end_Hsp_midline": self._set_hsp_midline, + "end_Statistics_db-num": self._set_statistics_db_num, + "end_Statistics_db-len": self._set_statistics_db_len, + "end_Statistics_hsp-len": self._set_statistics_hsp_len, + "end_Statistics_eff-space": self._set_statistics_eff_space, + "end_Statistics_kappa": self._set_statistics_kappa, + "end_Statistics_lambda": self._set_statistics_lambda, + "end_Statistics_entropy": self._set_statistics_entropy, + } + + def _setup_blast_v2(self): + self._method_name_level = 2 + self._xml_version = 2 + self._method_map = { + "start_report/Report": self._start_blast_record, + "end_report/Report": self._end_blast_record, + "end_Report/program": self._set_header_application, + "end_Report/version": self._set_header_version, + "end_Report/reference": self._set_header_reference, + "end_Target/db": self._set_header_database, + "end_Search/query-id": self._set_record_query_id, + "end_Search/query-title": self._set_record_query_def, + "end_Search/query-len": self._set_record_query_letters, + "end_BlastOutput_hits": self._set_record_hits, + "end_Parameters/matrix": self._set_parameters_matrix, + "end_Parameters/expect": self._set_parameters_expect, + "end_Parameters/sc-match": self._set_parameters_sc_match, + "end_Parameters/sc-mismatch": self._set_parameters_sc_mismatch, + "end_Parameters/gap-open": self._set_parameters_gap_penalties, + "end_Parameters/gap-extend": self._set_parameters_gap_extend, + "end_Parameters/filter": self._set_parameters_filter, + "start_hits/Hit": self._start_hit, + "end_hits/Hit": self._end_hit, + "start_description/HitDescr": self._start_hit_descr_item, + "end_description/HitDescr": self._end_hit_descr_item, + "end_HitDescr/id": self._end_description_id, + "end_HitDescr/accession": self._end_description_accession, + "end_HitDescr/title": self._end_description_title, + "end_HitDescr/taxid": self._end_description_taxid, + "end_HitDescr/sciname": self._end_description_sciname, + "end_Hit/len": self.set_hit_len, + "start_hsps/Hsp": self._start_hsp, + "end_hsps/Hsp": self._end_hsp, + "end_Hsp/score": self._set_hsp_score, + "end_Hsp/bit-score": self._set_hsp_bit_score, + "end_Hsp/evalue": self._set_hsp_e_value, + "end_Hsp/query-from": self._set_hsp_query_start, + "end_Hsp/query-to": self._set_hsp_query_end, + "end_Hsp/hit-from": self._set_hsp_hit_from, + "end_Hsp/hit-to": self._set_hsp_hit_to, + "end_Hsp/query-frame": self._set_hsp_query_frame, + "end_Hsp/hit-frame": self._set_hsp_hit_frame, + "end_Hsp/query-strand": self._set_hsp_query_strand, + "end_Hsp/hit-strand": self._set_hsp_hit_strand, + "end_Hsp/identity": self._set_hsp_identity, + "end_Hsp/positive": self._set_hsp_positive, + "end_Hsp/gaps": self._set_hsp_gaps, + "end_Hsp/align-len": self._set_hsp_align_len, + "end_Hsp/qseq": self._set_hsp_query_seq, + "end_Hsp/hseq": self._set_hsp_subject_seq, + "end_Hsp/midline": self._set_hsp_midline, + "end_Statistics/db-num": self._set_statistics_db_num, + "end_Statistics/db-len": self._set_statistics_db_len, + "end_Statistics/hsp-len": self._set_statistics_hsp_len, + "end_Statistics/eff-space": self._set_statistics_eff_space, + "end_Statistics/kappa": self._set_statistics_kappa, + "end_Statistics/lambda": self._set_statistics_lambda, + "end_Statistics/entropy": self._set_statistics_entropy, + } + + def _start_blast_record(self): + """Start interaction (PRIVATE).""" + self._blast = Record.Blast() + + def _end_blast_record(self): + """End interaction (PRIVATE).""" + # We stored a lot of generic "top level" information + # in self._header (an object of type Record.Header) + self._blast.reference = self._header.reference + self._blast.date = self._header.date + self._blast.version = self._header.version + self._blast.database = self._header.database + self._blast.application = self._header.application + + # These are required for "old" pre 2.2.14 files + # where only , + # and were used. Now they + # are supplemented/replaced by , + # and + if not hasattr(self._blast, "query") or not self._blast.query: + self._blast.query = self._header.query + if not hasattr(self._blast, "query_id") or not self._blast.query_id: + self._blast.query_id = self._header.query_id + if not hasattr(self._blast, "query_letters") or not self._blast.query_letters: + self._blast.query_letters = self._header.query_letters + + # Hack to record the query length as both the query_letters and + # query_length properties (as in the plain text parser, see + # Bug 2176 comment 12): + self._blast.query_length = self._blast.query_letters + # Perhaps in the long term we should deprecate one, but I would + # prefer to drop query_letters - so we need a transition period + # with both. + + # Hack to record the claimed database size as database_length + # (as well as in num_letters_in_database, see Bug 2176 comment 13): + self._blast.database_length = self._blast.num_letters_in_database + # TODO? Deprecate database_letters next? + + # Hack to record the claimed database sequence count as database_sequences + self._blast.database_sequences = self._blast.num_sequences_in_database + + # Apply the "top level" parameter information + self._blast.matrix = self._parameters.matrix + self._blast.num_seqs_better_e = self._parameters.num_seqs_better_e + self._blast.gap_penalties = self._parameters.gap_penalties + self._blast.filter = self._parameters.filter + self._blast.expect = self._parameters.expect + self._blast.sc_match = self._parameters.sc_match + self._blast.sc_mismatch = self._parameters.sc_mismatch + + # Add to the list + self._records.append(self._blast) + # Clear the object (a new empty one is create in _start_Iteration) + self._blast = None + + if self._debug: + print("NCBIXML: Added Blast record to results") + + # Header + def _set_header_application(self): + """BLAST program, e.g., blastp, blastn, etc. (PRIVATE). + + Save this to put on each blast record object + """ + self._header.application = self._value.upper() + + def _set_header_version(self): + """Version number and date of the BLAST engine (PRIVATE). + + e.g. "BLASTX 2.2.12 [Aug-07-2005]" but there can also be + variants like "BLASTP 2.2.18+" without the date. + + Save this to put on each blast record object + """ + parts = self._value.split() + # TODO - Check the first word starts with BLAST? + + # The version is the second word (field one) + self._header.version = parts[1] + + # Check there is a third word (the date) + if len(parts) >= 3: + if parts[2][0] == "[" and parts[2][-1] == "]": + self._header.date = parts[2][1:-1] + else: + # Assume this is still a date, but without the + # square brackets + self._header.date = parts[2] + + def _set_header_reference(self): + """Record any article reference describing the algorithm (PRIVATE). + + Save this to put on each blast record object + """ + self._header.reference = self._value + + def _set_header_database(self): + """Record the database(s) searched (PRIVATE). + + Save this to put on each blast record object + """ + self._header.database = self._value + + def _set_header_query_id(self): + """Record the identifier of the query (PRIVATE). + + Important in old pre 2.2.14 BLAST, for recent versions + is enough + """ + self._header.query_id = self._value + + def _set_header_query(self): + """Record the definition line of the query (PRIVATE). + + Important in old pre 2.2.14 BLAST, for recent versions + is enough + """ + self._header.query = self._value + + def _set_header_query_letters(self): + """Record the length of the query (PRIVATE). + + Important in old pre 2.2.14 BLAST, for recent versions + is enough + """ + self._header.query_letters = int(self._value) + + def _set_record_query_id(self): + """Record the identifier of the query (PRIVATE).""" + self._blast.query_id = self._value + + def _set_record_query_def(self): + """Record the definition line of the query (PRIVATE).""" + self._blast.query = self._value + + def _set_record_query_letters(self): + """Record the length of the query (PRIVATE).""" + self._blast.query_letters = int(self._value) + + # def _end_BlastOutput_query_seq(self): + # """The query sequence (PRIVATE).""" + # pass # XXX Missing in Record.Blast ? + + # def _end_BlastOutput_iter_num(self): + # """The psi-blast iteration number (PRIVATE).""" + # pass # XXX TODO PSI + + def _set_record_hits(self): + """Hits to the database sequences, one for every sequence (PRIVATE).""" + self._blast.num_hits = int(self._value) + + # def _end_BlastOutput_message(self): + # """error messages (PRIVATE).""" + # pass # XXX What to do ? + + # Parameters + def _set_parameters_matrix(self): + """Matrix used (-M on legacy BLAST) (PRIVATE).""" + self._parameters.matrix = self._value + + def _set_parameters_expect(self): + """Expect values cutoff (PRIVATE).""" + # NOTE: In old text output there was a line: + # Number of sequences better than 1.0e-004: 1 + # As far as I can see, parameters.num_seqs_better_e + # would take the value of 1, and the expectation + # value was not recorded. + # + # Anyway we should NOT record this against num_seqs_better_e + self._parameters.expect = self._value + + # def _end_Parameters_include(self): + # """Inclusion threshold for a psi-blast iteration (-h) (PRIVATE).""" + # pass # XXX TODO PSI + + def _set_parameters_sc_match(self): + """Match score for nucleotide-nucleotide comparison (-r) (PRIVATE).""" + self._parameters.sc_match = int(self._value) + + def _set_parameters_sc_mismatch(self): + """Mismatch penalty for nucleotide-nucleotide comparison (-r) (PRIVATE).""" + self._parameters.sc_mismatch = int(self._value) + + def _set_parameters_gap_penalties(self): + """Gap existence cost (-G) (PRIVATE).""" + self._parameters.gap_penalties = int(self._value) + + def _set_parameters_gap_extend(self): + """Gap extension cose (-E) (PRIVATE).""" + self._parameters.gap_penalties = ( + self._parameters.gap_penalties, + int(self._value), + ) + + def _set_parameters_filter(self): + """Record filtering options (-F) (PRIVATE).""" + self._parameters.filter = self._value + + # def _end_Parameters_pattern(self): + # """Pattern used for phi-blast search (PRIVATE). + # """ + # pass # XXX TODO PSI + + # def _end_Parameters_entrez_query(self): + # """Entrez query used to limit search (PRIVATE). + # """ + # pass # XXX TODO PSI + + # Hits + def _start_hit(self): + """Start filling records (PRIVATE).""" + self._blast.alignments.append(Record.Alignment()) + self._descr = ( + Record.Description() if self._xml_version == 1 else Record.DescriptionExt() + ) + self._blast.descriptions.append(self._descr) + self._blast.multiple_alignment = [] + self._hit = self._blast.alignments[-1] + + self._descr.num_alignments = 0 + + def _end_hit(self): + """Clear variables (PRIVATE).""" + # Cleanup + self._blast.multiple_alignment = None + self._hit = None + self._descr = None + + def set_hit_id(self): + """Record the identifier of the database sequence (PRIVATE).""" + self._hit.hit_id = self._value + self._hit.title = self._value + " " + + def set_hit_def(self): + """Record the definition line of the database sequence (PRIVATE).""" + self._hit.hit_def = self._value + self._hit.title += self._value + self._descr.title = self._hit.title + + def set_hit_accession(self): + """Record the accession value of the database sequence (PRIVATE).""" + self._hit.accession = self._value + self._descr.accession = self._value + + def set_hit_len(self): + """Record the length of the hit.""" + self._hit.length = int(self._value) + + # HSPs + def _start_hsp(self): + # Note that self._start_Hit() should have been called + # to setup things like self._blast.multiple_alignment + self._hsp = Record.HSP() + self._hsp.positives = None + self._hit.hsps.append(self._hsp) + self._descr.num_alignments += 1 + self._blast.multiple_alignment.append(Record.MultipleAlignment()) + self._mult_al = self._blast.multiple_alignment[-1] + + def _end_hsp(self): + if self._hsp.frame and len(self._hsp.frame) == 1: + self._hsp.frame += (0,) + + # Hsp_num is useless + def _set_hsp_score(self): + """Record the raw score of HSP (PRIVATE).""" + self._hsp.score = float(self._value) + if self._descr.score is None: + self._descr.score = float(self._value) + + def _set_hsp_bit_score(self): + """Record the Bit score of HSP (PRIVATE).""" + self._hsp.bits = float(self._value) + if self._descr.bits is None: + self._descr.bits = float(self._value) + + def _set_hsp_e_value(self): + """Record the expect value of the HSP (PRIVATE).""" + self._hsp.expect = float(self._value) + if self._descr.e is None: + self._descr.e = float(self._value) + + def _set_hsp_query_start(self): + """Offset of query at the start of the alignment (one-offset) (PRIVATE).""" + self._hsp.query_start = int(self._value) + + def _set_hsp_query_end(self): + """Offset of query at the end of the alignment (one-offset) (PRIVATE).""" + self._hsp.query_end = int(self._value) + + def _set_hsp_hit_from(self): + """Offset of the database at the start of the alignment (one-offset) (PRIVATE).""" + self._hsp.sbjct_start = int(self._value) + + def _set_hsp_hit_to(self): + """Offset of the database at the end of the alignment (one-offset) (PRIVATE).""" + self._hsp.sbjct_end = int(self._value) + + # def _end_Hsp_pattern_from(self): + # """Start of phi-blast pattern on the query (one-offset) (PRIVATE).""" + # pass # XXX TODO PSI + + # def _end_Hsp_pattern_to(self): + # """End of phi-blast pattern on the query (one-offset) (PRIVATE).""" + # pass # XXX TODO PSI + + def _set_hsp_query_frame(self): + """Frame of the query if applicable (PRIVATE).""" + v = int(self._value) + self._hsp.frame = (v,) + if self._header.application == "BLASTN": + self._hsp.strand = ("Plus" if v > 0 else "Minus",) + + def _set_hsp_hit_frame(self): + """Frame of the database sequence if applicable (PRIVATE).""" + v = int(self._value) + if len(self._hsp.frame) == 0: + self._hsp.frame = (0, v) + else: + self._hsp.frame += (v,) + if self._header.application == "BLASTN": + self._hsp.strand += ("Plus" if v > 0 else "Minus",) + + def _set_hsp_query_strand(self): + """Frame of the query if applicable (PRIVATE).""" + self._hsp.strand = (self._value,) + if self._header.application == "BLASTN": + self._hsp.frame = (1 if self._value == "Plus" else -1,) + + def _set_hsp_hit_strand(self): + """Frame of the database sequence if applicable (PRIVATE).""" + self._hsp.strand += (self._value,) + if self._header.application == "BLASTN": + self._hsp.frame += (1 if self._value == "Plus" else -1,) + + def _set_hsp_identity(self): + """Record the number of identities in the alignment (PRIVATE).""" + v = int(self._value) + self._hsp.identities = v + if self._hsp.positives is None: + self._hsp.positives = v + + def _set_hsp_positive(self): + """Record the number of positive (conservative) substitutions in the alignment (PRIVATE).""" + self._hsp.positives = int(self._value) + + def _set_hsp_gaps(self): + """Record the number of gaps in the alignment (PRIVATE).""" + self._hsp.gaps = int(self._value) + + def _set_hsp_align_len(self): + """Record the length of the alignment (PRIVATE).""" + self._hsp.align_length = int(self._value) + + # def _en_Hsp_density(self): + # """Score density (PRIVATE).""" + # pass # XXX ??? + + def _set_hsp_query_seq(self): + """Record the alignment string for the query (PRIVATE).""" + self._hsp.query = self._value + + def _set_hsp_subject_seq(self): + """Record the alignment string for the database (PRIVATE).""" + self._hsp.sbjct = self._value + + def _set_hsp_midline(self): + """Record the middle line as normally seen in BLAST report (PRIVATE).""" + self._hsp.match = self._value # do NOT strip spaces! + assert len(self._hsp.match) == len(self._hsp.query) + assert len(self._hsp.match) == len(self._hsp.sbjct) + + # Statistics + def _set_statistics_db_num(self): + """Record the number of sequences in the database (PRIVATE).""" + self._blast.num_sequences_in_database = int(self._value) + + def _set_statistics_db_len(self): + """Record the number of letters in the database (PRIVATE).""" + self._blast.num_letters_in_database = int(self._value) + + def _set_statistics_hsp_len(self): + """Record the effective HSP length (PRIVATE).""" + self._blast.effective_hsp_length = int(self._value) + + def _set_statistics_eff_space(self): + """Record the effective search space (PRIVATE).""" + self._blast.effective_search_space = float(self._value) + + def _set_statistics_kappa(self): + """Karlin-Altschul parameter K (PRIVATE).""" + self._blast.ka_params = float(self._value) + + def _set_statistics_lambda(self): + """Karlin-Altschul parameter Lambda (PRIVATE).""" + self._blast.ka_params = (float(self._value), self._blast.ka_params) + + def _set_statistics_entropy(self): + """Karlin-Altschul parameter H (PRIVATE).""" + self._blast.ka_params = self._blast.ka_params + (float(self._value),) + + def _start_hit_descr_item(self): + """XML v2. Start hit description item.""" + self._hit_descr_item = Record.DescriptionExtItem() + + def _end_hit_descr_item(self): + """XML v2. Start hit description item.""" + self._descr.append_item(self._hit_descr_item) + if not self._hit.title: + self._hit.title = str(self._hit_descr_item) + self._hit_descr_item = None + + def _end_description_id(self): + """XML v2. The identifier of the database sequence(PRIVATE).""" + self._hit_descr_item.id = self._value + if not self._hit.hit_id: + self._hit.hit_id = self._value + + def _end_description_accession(self): + """XML v2. The accession value of the database sequence (PRIVATE).""" + self._hit_descr_item.accession = self._value + if not getattr(self._hit, "accession", None): + self._hit.accession = self._value + + def _end_description_title(self): + """XML v2. The hit description title (PRIVATE).""" + self._hit_descr_item.title = self._value + + def _end_description_taxid(self): + try: + self._hit_descr_item.taxid = int(self._value) + except ValueError: + pass + + def _end_description_sciname(self): + self._hit_descr_item.sciname = self._value + + +def read(handle, debug=0): + """Return a single Blast record (assumes just one query). + + Uses the BlastParser internally. + + This function is for use when there is one and only one BLAST + result in your XML file. + + Use the Bio.Blast.NCBIXML.parse() function if you expect more than + one BLAST record (i.e. if you have more than one query sequence). + """ + iterator = parse(handle, debug) + try: + record = next(iterator) + except StopIteration: + raise ValueError("No records found in handle") from None + try: + next(iterator) + raise ValueError("More than one record found in handle") + except StopIteration: + pass + return record + + +def parse(handle, debug=0): + """Return an iterator a Blast record for each query. + + Incremental parser, this is an iterator that returns + Blast records. It uses the BlastParser internally. + + handle - file handle to and XML file to parse + debug - integer, amount of debug information to print + + This is a generator function that returns multiple Blast records + objects - one for each query sequence given to blast. The file + is read incrementally, returning complete records as they are read + in. + + Should cope with new BLAST 2.2.14+ which gives a single XML file + for multiple query records. + + Should also cope with XML output from older versions BLAST which + gave multiple XML files concatenated together (giving a single file + which strictly speaking wasn't valid XML). + """ + from xml.parsers import expat + + BLOCK = 1024 + MARGIN = 10 # must be at least length of newline + XML start + XML_START = ""): + """Ensure the given value formats to a string correctly.""" + if value is None: + return default_str + return format_spec % value + + +class Header: + """Saves information from a blast header. + + Members: + application The name of the BLAST flavor that generated this data. + version Version of blast used. + date Date this data was generated. + reference Reference for blast. + + query Name of query sequence. + query_letters Number of letters in the query sequence. (int) + + database Name of the database. + database_sequences Number of sequences in the database. (int) + database_letters Number of letters in the database. (int) + + """ + + def __init__(self): + """Initialize the class.""" + self.application = "" + self.version = "" + self.date = "" + self.reference = "" + + self.query = "" + self.query_letters = None + + self.database = "" + self.database_sequences = None + self.database_letters = None + + +class Description: + """Stores information about one hit in the descriptions section. + + Members: + title Title of the hit. + score Number of bits. (int) + bits Bit score. (float) + e E value. (float) + num_alignments Number of alignments for the same subject. (int) + """ + + def __init__(self): + """Initialize the class.""" + self.title = "" + self.score = None + self.bits = None + self.e = None + self.num_alignments = None + + def __str__(self): + """Return the description as a string.""" + return "%-66s %5s %s" % (self.title, self.score, self.e) + + +class DescriptionExt(Description): + """Extended description record for BLASTXML version 2. + + Members: + items List of DescriptionExtItem + """ + + def __init__(self): + """Initialize the class.""" + super().__init__() + + self.items = [] + + def append_item(self, item): + """Add a description extended record.""" + if len(self.items) == 0: + self.title = str(item) + self.items.append(item) + + +class DescriptionExtItem: + """Stores information about one record in hit description for BLASTXML version 2. + + Members: + id Database identifier + title Title of the hit. + """ + + def __init__(self): + """Initialize the class.""" + self.id = None + self.title = None + self.accession = None + self.taxid = None + self.sciname = None + + def __str__(self): + """Return the description identifier and title as a string.""" + return "%s %s" % (self.id, self.title) + + +class Alignment: + """Stores information about one hit in the alignments section. + + Members: + title Name. + hit_id Hit identifier. (str) + hit_def Hit definition. (str) + length Length. (int) + hsps A list of HSP objects. + + """ + + def __init__(self): + """Initialize the class.""" + self.title = "" + self.hit_id = "" + self.hit_def = "" + self.length = None + self.hsps = [] + + def __str__(self): + """Return the BLAST alignment as a formatted string.""" + lines = self.title.split("\n") + lines.append("Length = %s\n" % self.length) + return "\n ".join(lines) + + +class HSP: + """Stores information about one hsp in an alignment hit. + + Members: + - score BLAST score of hit. (float) + - bits Number of bits for that score. (float) + - expect Expect value. (float) + - num_alignments Number of alignments for same subject. (int) + - identities Number of identities (int) if using the XML parser. + Tuple of number of identities/total aligned (int, int) + if using the (obsolete) plain text parser. + - positives Number of positives (int) if using the XML parser. + Tuple of number of positives/total aligned (int, int) + if using the (obsolete) plain text parser. + - gaps Number of gaps (int) if using the XML parser. + Tuple of number of gaps/total aligned (int, int) if + using the (obsolete) plain text parser. + - align_length Length of the alignment. (int) + - strand Tuple of (query, target) strand. + - frame Tuple of 1 or 2 frame shifts, depending on the flavor. + + - query The query sequence. + - query_start The start residue for the query sequence. (1-based) + - query_end The end residue for the query sequence. (1-based) + - match The match sequence. + - sbjct The sbjct sequence. + - sbjct_start The start residue for the sbjct sequence. (1-based) + - sbjct_end The end residue for the sbjct sequence. (1-based) + + Not all flavors of BLAST return values for every attribute:: + + score expect identities positives strand frame + BLASTP X X X X + BLASTN X X X X X + BLASTX X X X X X + TBLASTN X X X X X + TBLASTX X X X X X/X + + Note: for BLASTX, the query sequence is shown as a protein sequence, + but the numbering is based on the nucleotides. Thus, the numbering + is 3x larger than the number of amino acid residues. A similar effect + can be seen for the sbjct sequence in TBLASTN, and for both sequences + in TBLASTX. + + Also, for negative frames, the sequence numbering starts from + query_start and counts down. + + """ + + def __init__(self): + """Initialize the class.""" + self.score = None + self.bits = None + self.expect = None + self.num_alignments = None + self.identities = (None, None) + self.positives = (None, None) + self.gaps = (None, None) + self.align_length = None + self.strand = (None, None) + self.frame = () + + self.query = "" + self.query_start = None + self.query_end = None + self.match = "" + self.sbjct = "" + self.sbjct_start = None + self.sbjct_end = None + + def __str__(self): + """Return the BLAST HSP as a formatted string.""" + lines = [ + "Score %s (%s bits), expectation %s, alignment length %s" + % ( + fmt_(self.score, "%i"), + fmt_(self.bits, "%i"), + fmt_(self.expect, "%0.1e"), + fmt_(self.align_length, "%i"), + ) + ] + if self.align_length is None: + return "\n".join(lines) + if self.align_length < 50: + lines.append( + "Query:%8s %s %s" % (self.query_start, self.query, self.query_end) + ) + lines.append(" %s" % self.match) + lines.append( + "Sbjct:%8s %s %s" % (self.sbjct_start, self.sbjct, self.sbjct_end) + ) + else: + lines.append( + "Query:%8s %s...%s %s" + % (self.query_start, self.query[:45], self.query[-3:], self.query_end,) + ) + lines.append(" %s...%s" % (self.match[:45], self.match[-3:])) + lines.append( + "Sbjct:%8s %s...%s %s" + % (self.sbjct_start, self.sbjct[:45], self.sbjct[-3:], self.sbjct_end) + ) + return "\n".join(lines) + + +class MultipleAlignment: + """Holds information about a multiple alignment. + + Members: + alignment A list of tuples (name, start residue, sequence, end residue). + + The start residue is 1-based. It may be blank, if that sequence is + not aligned in the multiple alignment. + + """ + + def __init__(self): + """Initialize the class.""" + self.alignment = [] + + def to_generic(self): + """Retrieve generic alignment object for the given alignment. + + Instead of the tuples, this returns a MultipleSeqAlignment object + from Bio.Align, through which you can manipulate and query + the object. + + Thanks to James Casbon for the code. + """ + seq_parts = [] + seq_names = [] + parse_number = 0 + n = 0 + for name, start, seq, end in self.alignment: + if name == "QUERY": # QUERY is the first in each alignment block + parse_number += 1 + n = 0 + + if parse_number == 1: # create on first_parse, append on all others + seq_parts.append(seq) + seq_names.append(name) + else: + seq_parts[n] += seq + n += 1 + + records = ( + SeqRecord(Seq(seq), name) for (name, seq) in zip(seq_names, seq_parts) + ) + return MultipleSeqAlignment(records) + + +class Round: + """Holds information from a PSI-BLAST round. + + Members: + number Round number. (int) + reused_seqs Sequences in model, found again. List of Description objects. + new_seqs Sequences not found, or below threshold. List of Description. + alignments A list of Alignment objects. + multiple_alignment A MultipleAlignment object. + """ + + def __init__(self): + """Initialize the class.""" + self.number = None + self.reused_seqs = [] + self.new_seqs = [] + self.alignments = [] + self.multiple_alignment = None + + +class DatabaseReport: + """Holds information about a database report. + + Members: + database_name List of database names. (can have multiple dbs) + num_letters_in_database Number of letters in the database. (int) + num_sequences_in_database List of number of sequences in the database. + posted_date List of the dates the databases were posted. + ka_params A tuple of (lambda, k, h) values. (floats) + gapped # XXX this isn't set right! + ka_params_gap A tuple of (lambda, k, h) values. (floats) + + """ + + def __init__(self): + """Initialize the class.""" + self.database_name = [] + self.posted_date = [] + self.num_letters_in_database = [] + self.num_sequences_in_database = [] + self.ka_params = (None, None, None) + self.gapped = 0 + self.ka_params_gap = (None, None, None) + + +class Parameters: + """Holds information about the parameters. + + Members: + matrix Name of the matrix. + gap_penalties Tuple of (open, extend) penalties. (floats) + sc_match Match score for nucleotide-nucleotide comparison + sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison + num_hits Number of hits to the database. (int) + num_sequences Number of sequences. (int) + num_good_extends Number of extensions. (int) + num_seqs_better_e Number of sequences better than e-value. (int) + hsps_no_gap Number of HSP's better, without gapping. (int) + hsps_prelim_gapped Number of HSP's gapped in prelim test. (int) + hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int) + hsps_gapped Total number of HSP's gapped. (int) + query_length Length of the query. (int) + query_id Identifier of the query sequence. (str) + database_length Number of letters in the database. (int) + effective_hsp_length Effective HSP length. (int) + effective_query_length Effective length of query. (int) + effective_database_length Effective length of database. (int) + effective_search_space Effective search space. (int) + effective_search_space_used Effective search space used. (int) + frameshift Frameshift window. Tuple of (int, float) + threshold Threshold. (int) + window_size Window size. (int) + dropoff_1st_pass Tuple of (score, bits). (int, float) + gap_x_dropoff Tuple of (score, bits). (int, float) + gap_x_dropoff_final Tuple of (score, bits). (int, float) + gap_trigger Tuple of (score, bits). (int, float) + blast_cutoff Tuple of (score, bits). (int, float) + """ + + def __init__(self): + """Initialize the class.""" + self.matrix = "" + self.gap_penalties = (None, None) + self.sc_match = None + self.sc_mismatch = None + self.num_hits = None + self.num_sequences = None + self.num_good_extends = None + self.num_seqs_better_e = None + self.hsps_no_gap = None + self.hsps_prelim_gapped = None + self.hsps_prelim_gapped_attemped = None + self.hsps_gapped = None + self.query_id = None + self.query_length = None + self.database_length = None + self.effective_hsp_length = None + self.effective_query_length = None + self.effective_database_length = None + self.effective_search_space = None + self.effective_search_space_used = None + self.frameshift = (None, None) + self.threshold = None + self.window_size = None + self.dropoff_1st_pass = (None, None) + self.gap_x_dropoff = (None, None) + self.gap_x_dropoff_final = (None, None) + self.gap_trigger = (None, None) + self.blast_cutoff = (None, None) + + +# TODO - Add a friendly __str__ method to BLAST results +class Blast(Header, DatabaseReport, Parameters): + """Saves the results from a blast search. + + Members: + descriptions A list of Description objects. + alignments A list of Alignment objects. + multiple_alignment A MultipleAlignment object. + + members inherited from base classes + + """ + + def __init__(self): + """Initialize the class.""" + Header.__init__(self) + DatabaseReport.__init__(self) + Parameters.__init__(self) + self.descriptions = [] + self.alignments = [] + self.multiple_alignment = None + + +class PSIBlast(Header, DatabaseReport, Parameters): + """Saves the results from a blastpgp search. + + Members: + rounds A list of Round objects. + converged Whether the search converged. + + members inherited from base classes + + """ + + def __init__(self): + """Initialize the class.""" + Header.__init__(self) + DatabaseReport.__init__(self) + Parameters.__init__(self) + self.rounds = [] + self.converged = 0 diff --git a/code/lib/Bio/Blast/__init__.py b/code/lib/Bio/Blast/__init__.py new file mode 100644 index 0000000..27c0ec3 --- /dev/null +++ b/code/lib/Bio/Blast/__init__.py @@ -0,0 +1,7 @@ +# Copyright 1999 by Jeffrey Chang. All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Code for dealing with BLAST programs and output.""" diff --git a/code/lib/Bio/Blast/__pycache__/Applications.cpython-37.pyc b/code/lib/Bio/Blast/__pycache__/Applications.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44e931416d8260119a551f8383f2792734da8aef GIT binary patch literal 47345 zcmeHwTWlOxnqFVXX0u62)WsLcwp^y|Avt2VNa|vX5@l1=#hkHdjYwObj%RwCT}85` zW;b2cw8&a{XEUDNYvV3VgT&<}Y@fWSZ= z0^}vhLy+(L&pB1q&1zm`jYpXo*{rT}>eRW_Isf_o%lXf>Lx*w+{JWSKto-}lWa7Ut zVE*;s=573WuO$-+J5fy7Njp_buBVEr^>i`4-c#&JCT=C{wB7Tggx%w$KT8*T5$?4! z2xpvLgfj^D*;$0MP6pvVg!}Cr!a1i8;Vi-f_8`K8P8Q*Qgb&$=5k4&89KuKJA%uq{ zJb>^~`xwHdr%JVeZ613S8V=zVB3;?Kw21svRYpDysJ*8SawBSE|K^ zr@E}YC3hO0e9ipghEu7oR4djzKd*c|`IM&j+zn5f(f;~f=M#KG$1>SiK{lN)$96)b z+561Nc52OXOhr1syxmwYW5_u5wQ{|Jfmmxa9&Bz{&FZ>il`9pehEdA_Dp{E}N&usJ zy^JB}wzqVyQAbmo*6iX!VG@JWYGBa1>O)(N&F02tlM@23dM3N|@&iY*w3plSnhj?K zbE>!w>Y}r<%V=NObG%&cCg#`5rYq&xPP1ICdE=JnIG6)-<@Iu9tzkJ$t6VFnSIsFQO5yv)({VbFJ!G z>kWIe=5WfcY_cljQkQDoYdU2cDJz(?PTkHeZ&qtIDaUqW)3q2Ybw<6b^-mfP_@vsj zKB<3KX|1dXb! zRhZiROXKEk{CUH;IEi8st3k?67So{Zr2ffbkD~g;UONRkm~EXb3H`xG>Kusvf4hcQ zD^jM_Py_*8ZjFM(jkVFZhlU7_-nl#f!K|W;7XB@<8}UCH{E751-$w^jK=eHH%Q!p6 zcFLfTLUp~hvWxG~kj7&G;vc)SQRk;A*J{#%iZpAwDpjwKTV`HI@0?~uvY1`Ct|mxH zjTlS{kZxwaP)VV;_>T+eA^d$Cf8Kw`k<@_7$phU}w=ITpkuxey}e8A<=F{T%L}N9vQGr;+-U{Q|-- z24O662*0euN!zktL3mioebs&q_pg7JS{?W_y*>0O{U~ui{aoTvqLjXusGsUdGt^9Ze5Nj*yay(AXJ1O~%57W`Cxt(9E!FWhml7FzEw z&fdBq!~Ob=#ksrlcb4YgTexgl)zx|fb5aqFvb(y83DEQkes6xwscp1+c=f*>$z$A# zWFQ&N-o5qy?HdbAi}>(f#jVP>zT7qEY@VcY)_yv`}4qEli^<~H1rE)!a5Vlqt z7ys0In#5Hxu{jvaH8hEhC@kF^OSQ5qcu+5|JFSfK@g~Tb-^Z6%)|O*1Uo# z?K~`Cly7eU9Mo#tXrCa7RZ!H4{N#CSSy5BVYk*#>!RJ{Oq*?}0Kq)qccWrYWAf#4( z;8-IUt&ctD9_tqK@ORH6{m^F9)sYS%8seEf5GqsLyvO*Bzz2zUxqhm{vZ{!t+I%E7F zFG%NGjTOsKe315!Hyx~29w#k}vKnpR_Lm-~ zwpf+A*VZ4DE-7KStv1(M1MD9i@PGSV;z0F8KB1;tTuH)qS))X)=sT=SrpqSB`30rB zE=~}iE;O*Ts&#r<4|z3?t*h4gf;DS%-NSbmqI$-fDohp5O~`z1uGcW1arGyFO6<*w zW4*H!48ga;&%i}Z=cx#H0iR%_3_OL=qefHlaC_LF!ob_CVa3I}7%eWR4o1C(o%*#= zZ;TT~UbWvE>uVjJ#k_F2Fy3(8hU=1OyQJW)91>wZfwFr@)oi*|Og%@X;>y^{mP!Qt zrP5eWtH*O{E3I^8-EL(bJjB5BT0_njmf2F6%bzCPXYt*Y$LZkNw}O2hFZLD6cx+M*t{iuMygxji4m2u(9nP zMYaX}c}ut?l7s0~GM7pvGszy@nQJPEdkLp{csIy!GL_Cq%J$!!q|2sL>7nG|)L_yd z?z*rSOfR&pdl}SYjx~A*m#u4$lGhX8oBlNQDDg$&QR+{UZf5J7kCKm4CliQ2{VCQG z#HSH|n(N}D)T8uD>e<9+$sZ;^K7B9oapqn^07lxkKfquGS&&btzm5#&Y8|Wk2H2_P zY7HnIl%f+jaB)Vr1K4iM7%6>XvcgARYuIY}W-9+x1(iJObx#Hnf3QS_Wz?L)0!aJe z(yG%8zi=yu+HI;KGS=fBL(^M_qAz)^WVw}GmSLbS&oRvlK~*rII-0S?P84RnH=@CExK z!fyoOH|x)V4TNt7;am1Q2+s%MZ`t2Q z_}w6U+g?EUy-4fs*xy0S-AK%$y@Z(e^*bs11N$Dr9|BFk7=xx?RA?H4r2o55JpJ7T zKa-brt<@_({)*=($=6#QKNVZ*gcUe$Ms{lTsO<~_t(j1_9n2avYe1bMa9 z1_WVi>ml$fShuk-uWzmcd-#X)USp+MS}%L#nu~+ckbjLU>9z%gj%A)u2%Vxua%B$? zd!=ddMJy)QAaird;0g_NoUxkjrUMjwIA5!PwGB!*)E4xoiMDHvho~x(Malw1+KE$4 z*8`}$g^pXrYUVXt8TIRbe%Ar-UWFLVQ{OrYlF{)@OyGQLe#DdhFsli08oA+V$P!o4 zz)u{Yb|$9O1k8cD4W32fD7pDJQ0=`#kFdFi&c6Or(UWmh2G1BVR4S7`$ z_E8rU3(_f1_m9k=k1LH&N^9Om33BgB@ladP|JEHuqClMil0rhc3IvELg?Fscm0F{W zkvu^fI5{CaN;f8CD8F9bDuI|0xS{c0>qtk0fBklOONa-!H3$JXorFQcc*F}K8~VU; zQ1(uK37jd~)Cb}yL@c9z{WCYfUPBuINucJ!kTLrxAe<&I#aYYz_BOy->pTB!w;VJr z@W3fJL5+~hGtyXek;i^5=_y=1!4`MM`U~%&TU|)hK=HP;0EBISXYme%ZRiKXNqG=& z+Cqgl>#N`crC+%45OeG9kS3U7idl2#-(G}RHYR_SXIm+{t8=;QhWnhb)5a*u z^EaiXJ7HP8QxYFtTDF%GogqqePRFv@p)?1|fBDDG4Wv2WLV$V;JG}}8&?+?;qQ`*t zB5h}W+Vg2rBss}HNkT)AxSw>d;F|KTG@&x0G^g3~Mem~opNftkyWmoQ;EwQ;=VhE1 zin8$8_~V}8MG}qT&Of6hGtgd!Xatk04N+)aPhl*hWEW%1LNYtaJCWV>2O^iTln83v zQ+(DjD$XMA5<0@;kV>So$sYW7IF(BdCbLQZ-+T4kOZ)at5vZ|3$#!ZElb>lVxZ= zY!KNCC{r{|ba10ag=aMT<4^}zAo+#vqz>t|(d=1RgmJGG@NxP(O8-SJ^KWp|T`nV1 zn2g;Ajz3G8fP`W2f-qz=h&d#38zYx7DU4jkgpFLrgtc5o<+$1x$PrSal}l-@TpC)r zKZ{5aQd%q5b02f9Lo0V`!O!La3>q6wz1XL3{Fmk$;2W_#G1@9~S&uBVH;on~$2SE| z*v0W&tsd=oA0=us-s;mg{uhf{E^odb1@ovtWJTJsNZ8BcU?>!;ir02)QI zCJGviQkN7MsEn=t3wy88j^r0m?umq5m&CMJW1Vz${p?LX_BL z@aNM)!`fk;?`$fi7nRK#6h*5g!PKooZ9)Ivz6127qZI0n3=FhEu`o)E0m~n7Wn`BK z?5JCVGQ+81Olg@rR7!km6idn=qFmob@>N)j